From 6aed20dfb3fd7889dbf62239c80ecac02140bf89 Mon Sep 17 00:00:00 2001
From: Yulong Ao <aoyulong@baidu.com>
Date: Sun, 7 Aug 2022 09:36:53 +0000
Subject: [PATCH 1/3] [Auto Parallel] Add ProcessMesh, DeviceMesh and
 DistributedMapper

---
 paddle/fluid/distributed/CMakeLists.txt       |   1 +
 .../distributed/auto_parallel/CMakeLists.txt  |  40 ++
 .../auto_parallel/auto_parallel.proto         | 173 ++++++++
 .../distributed/auto_parallel/device_mesh.cc  | 398 ++++++++++++++++++
 .../distributed/auto_parallel/device_mesh.h   | 273 ++++++++++++
 .../auto_parallel/device_mesh_test.cc         |  93 ++++
 .../distributed/auto_parallel/dist_mapper.cc  | 146 +++++++
 .../distributed/auto_parallel/dist_mapper.h   |  73 ++++
 .../auto_parallel/dist_mapper_test.cc         |  72 ++++
 .../distributed/auto_parallel/process_mesh.cc | 134 ++++++
 .../distributed/auto_parallel/process_mesh.h  |  94 +++++
 .../auto_parallel/process_mesh_test.cc        |  54 +++
 .../fluid/distributed/auto_parallel/utils.h   | 105 +++++
 13 files changed, 1656 insertions(+)
 create mode 100644 paddle/fluid/distributed/auto_parallel/CMakeLists.txt
 create mode 100644 paddle/fluid/distributed/auto_parallel/auto_parallel.proto
 create mode 100644 paddle/fluid/distributed/auto_parallel/device_mesh.cc
 create mode 100644 paddle/fluid/distributed/auto_parallel/device_mesh.h
 create mode 100644 paddle/fluid/distributed/auto_parallel/device_mesh_test.cc
 create mode 100644 paddle/fluid/distributed/auto_parallel/dist_mapper.cc
 create mode 100644 paddle/fluid/distributed/auto_parallel/dist_mapper.h
 create mode 100644 paddle/fluid/distributed/auto_parallel/dist_mapper_test.cc
 create mode 100644 paddle/fluid/distributed/auto_parallel/process_mesh.cc
 create mode 100644 paddle/fluid/distributed/auto_parallel/process_mesh.h
 create mode 100644 paddle/fluid/distributed/auto_parallel/process_mesh_test.cc
 create mode 100644 paddle/fluid/distributed/auto_parallel/utils.h

diff --git a/paddle/fluid/distributed/CMakeLists.txt b/paddle/fluid/distributed/CMakeLists.txt
index 24e0a8c7a5d9f..b18ed421fcd78 100755
--- a/paddle/fluid/distributed/CMakeLists.txt
+++ b/paddle/fluid/distributed/CMakeLists.txt
@@ -47,3 +47,4 @@ add_subdirectory(ps)
 add_subdirectory(test)
 add_subdirectory(index_dataset)
 add_subdirectory(fleet_executor)
+add_subdirectory(auto_parallel)
diff --git a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt
new file mode 100644
index 0000000000000..cfcc12515da91
--- /dev/null
+++ b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt
@@ -0,0 +1,40 @@
+cc_library(
+  device_mesh
+  SRCS device_mesh.cc
+  DEPS auto_parallel_proto)
+cc_test(
+  device_mesh_test
+  SRCS device_mesh_test.cc
+  DEPS device_mesh)
+
+cc_library(
+  process_mesh
+  SRCS process_mesh.cc
+  DEPS auto_parallel_proto)
+cc_test(
+  process_mesh_test
+  SRCS process_mesh_test.cc
+  DEPS process_mesh)
+
+# cc_library(
+#   dist_attr
+#   SRCS dist_attr.cc
+#   DEPS process_mesh auto_parallel_proto proto_desc)
+# cc_test(
+#   dist_attr_test
+#   SRCS dist_attr_test.cc
+#   DEPS dist_attr)
+
+cc_library(
+  dist_mapper
+  SRCS dist_mapper.cc
+  DEPS device_mesh auto_parallel_proto)
+cc_test(
+  dist_mapper_test
+  SRCS dist_mapper_test.cc
+  DEPS dist_mapper)
+
+proto_library(auto_parallel_proto SRCS auto_parallel.proto)
+
+# cc_library(auto_parallel DEPS process_mesh device_mesh dist_attr dist_mapper
+#                               auto_parallel_proto)
diff --git a/paddle/fluid/distributed/auto_parallel/auto_parallel.proto b/paddle/fluid/distributed/auto_parallel/auto_parallel.proto
new file mode 100644
index 0000000000000..eae33e929a2d3
--- /dev/null
+++ b/paddle/fluid/distributed/auto_parallel/auto_parallel.proto
@@ -0,0 +1,173 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless optional by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+syntax = "proto2";
+
+package paddle.distributed.auto_parallel;
+
+// ProcessMesh is used to organize processes and like n-dimension array.
+message ProcessMeshProto {
+  // The size of each dimension.
+  repeated int64 shape = 1;
+
+  // These process ids are stored by a row-major way.
+  // There are no duplicate process ids within one process mesh.
+  repeated int64 process_ids = 2;
+
+  // The name of each dimension. 
+  repeated string dim_names = 3;
+
+}
+
+// // This distributed attribute describes how to distribute the corresponding tensor,
+// // and store any other information needed by auto parallel.
+// message TensorDistAttrProto {
+//   // The process mesh where a tensor is distributed.
+//   optional ProcessMeshProto process_mesh = 1;
+// 
+//   // The length of dims_mapping is same as the length of the tensor shape.
+//   // The i-th dimension of the tensor will be sharded by the dims_mapping[i]-th dimension 
+//   // of the above process mesh. If dims_mapping[i] is -1, the i-th dimension of the tensor
+//   // will not be sharded. For example, given a tensor shape [2, 6, 12], a process mesh
+//   // shape [2, 3] and a dims_mapping [-1, 1, 0], each sharded tensor will have a shape [2, 2, 6].
+//   repeated int64 dims_mapping = 2;
+// 
+//   // The batch dimension of the corresponding tensor. 
+//   optional int64 batch_dim = 3;
+// 
+//   // If the dynamic_dims[i] is True, the i-th dimension of the corresponding tensor 
+//   // is dynamic changed. Otherwise, the i-th dimension of the tensor is static determined. 
+//   repeated bool dynamic_dims = 4;
+// }
+// 
+// // This distributed attribute describes how to distribute the corresponding operator,
+// // and store any other information needed by auto parallel.
+// message OperatorDistAttrProto {
+//   message TensorDistAttrMappingEntryProto {
+//     optional string name = 1;
+//     optional TensorDistAttrProto tensor_dist_attr = 2;
+//   } 
+//   // The key of this map is the input tensor name and the value is the distributed attribute
+//   // of the input tensor required by this corresponding operator.   
+//   // The distributed attribute of the actual tensor may be not the same as that within 
+//   // the distributed attribute of the operator.
+//   repeated TensorDistAttrMappingEntryProto input_dist_attrs = 1;
+// 
+//   // The key of this map is the output tensor name and the value is the distributed attribute
+//   // of the output tensor required by this corresponding operator.   
+//   // The distributed attribute of the actual tensor may be not the same as that within 
+//   // the distributed attribute of the operator.
+//   repeated TensorDistAttrMappingEntryProto output_dist_attrs = 2;
+// 
+//   // The process mesh where a op is distributed.
+//   optional ProcessMeshProto process_mesh = 3;
+// 
+//   // A operator ideally has a distributed operator which may have multiple distributed implementations.
+//   // This filed is usually same as the operator type. However, some operators such as the element-wise operators
+//   // may shared the same distributed operator, the field is use for this scenario.
+//   optional string impl_type = 4;
+// 
+//   // This field tells which distributed implementations of this corresponding operator 
+//   // will be selected for the actual computation.
+//   optional int64 impl_idx = 5;
+// }
+
+// This proto describes the capability of one device such as the computation and memory.
+message DeviceCapabilityProto {
+  optional double single_precision_flops = 1;
+
+  optional double double_precision_flops = 2;
+
+  optional double memory_size_in_bytes = 3;
+
+  optional double clock_rate_in_ghz = 4;
+}
+
+// This proto represents a device.
+message DeviceProto {
+  // The global id of this device within the cluster.
+  optional int64 global_id = 1;
+
+  // The local id of this device within the machine.
+  optional int64 local_id = 2;
+
+  // The id of the machine own this device.
+  optional int64 machine_id = 3;
+
+  // The id of the machine has this device.
+  optional string type = 4;
+
+  // The capability of this device.
+  optional DeviceCapabilityProto capability = 5; 
+}
+
+// This proto describes the capability of the link between two devices.        
+message LinkCapabilityProto {        
+  optional int64 bandwidth = 1; // Bytes/s       
+  optional int64 latency = 2;        
+}
+
+message LinkProto {
+  // The global id of the source device.
+  optional int64 source_id = 1;
+
+  // The global id of the source device.
+  optional int64 target_id = 2;
+
+  // Represent the link type.
+  optional string type = 3;
+      
+  // The capability of this link.
+  optional LinkCapabilityProto capability = 4; 
+}
+
+// DeviceMesh is used to organize devices and like n-dimension array.
+message DeviceMeshProto {
+  // The global id of this mesh. 
+  optional string name = 1;
+
+  // The size of each dimension.
+  repeated int64 shape = 2;
+
+  // These device ids are stored by a row-major way.
+  // There are no duplicate device ids within one device mesh.
+  repeated int64 device_ids = 3;
+
+  // The name of each dimension. 
+  repeated string dim_names = 4;
+
+  // The devices of this mesh.
+  repeated DeviceProto devices = 5;
+
+  // The links are between devices. 
+  repeated LinkProto links = 6;
+}
+
+// Record the mapping between the logical processes and the physical devices.
+message DistributedMapperProto {
+  // The device meshes used by this distributed computation,
+  // which may be shared by different multiple device meshes.
+  repeated DeviceMeshProto device_meshes = 1;
+
+  message MapperEntryProto {
+    optional int64 process_id = 1;
+    optional string device_mesh_name = 2;
+    repeated int64 device_ids = 3;
+  }
+
+  // The mapping from process ids to device ids.
+  // It is also possible for one process to use multiple devices.
+  // It is possible for one device shared by multiple processes.
+  repeated MapperEntryProto process_id_to_device_ids = 2;
+}
diff --git a/paddle/fluid/distributed/auto_parallel/device_mesh.cc b/paddle/fluid/distributed/auto_parallel/device_mesh.cc
new file mode 100644
index 0000000000000..6bf26ad6f74e4
--- /dev/null
+++ b/paddle/fluid/distributed/auto_parallel/device_mesh.cc
@@ -0,0 +1,398 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <algorithm>
+#include <iterator>
+
+#include "paddle/fluid/distributed/auto_parallel/device_mesh.h"
+#include "paddle/fluid/distributed/auto_parallel/utils.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+std::string DeviceCapability::to_string() const {
+  std::string str;
+  str += "{sflops: " + to_string_with_precision(single_precision_flops) + ", ";
+  str += "dflops: " + to_string_with_precision(double_precision_flops) + ", ";
+  str += "memory: " + to_string_with_precision(memory_size_in_bytes) + ", ";
+  str += "rate: " + to_string_with_precision(clock_rate_in_ghz) + "}";
+  return str;
+}
+
+DeviceCapability DeviceCapability::from_proto(
+    const DeviceCapabilityProto &proto) {
+  DeviceCapability capability;
+  capability.single_precision_flops = proto.single_precision_flops();
+  capability.double_precision_flops = proto.double_precision_flops();
+  capability.memory_size_in_bytes = proto.memory_size_in_bytes();
+  capability.clock_rate_in_ghz = proto.clock_rate_in_ghz();
+  return capability;
+}
+
+DeviceCapabilityProto DeviceCapability::to_proto() const {
+  DeviceCapabilityProto proto;
+  proto.set_single_precision_flops(single_precision_flops);
+  proto.set_double_precision_flops(double_precision_flops);
+  proto.set_memory_size_in_bytes(memory_size_in_bytes);
+  proto.set_clock_rate_in_ghz(clock_rate_in_ghz);
+  return proto;
+}
+
+std::string Device::to_string() const {
+  std::string str = "{global_id: " + std::to_string(global_id_) + ", ";
+  str += "local_id: " + std::to_string(local_id_) + ", ";
+  str += "machine_id: " + std::to_string(machine_id_) + ", ";
+  str += "type: " + type_ + ", ";
+  str += "capability: " + capability_.to_string() + "}";
+  return str;
+}
+
+Device Device::from_proto(const DeviceProto &proto) {
+  Device device;
+  device.global_id_ = proto.global_id();
+  device.local_id_ = proto.local_id();
+  device.machine_id_ = proto.machine_id();
+  device.type_ = proto.type();
+  device.capability_ = DeviceCapability::from_proto(proto.capability());
+  return device;
+}
+
+DeviceProto Device::to_proto() const {
+  DeviceProto proto;
+  proto.set_global_id(global_id_);
+  proto.set_local_id(local_id_);
+  proto.set_machine_id(machine_id_);
+  proto.set_type(type_);
+  proto.mutable_capability()->CopyFrom(capability_.to_proto());
+  return proto;
+}
+
+bool operator==(const Device &lhs, const Device &rhs) {
+  if (lhs.global_id() != rhs.global_id()) {
+    return false;
+  }
+  if (lhs.local_id() != rhs.local_id()) {
+    return false;
+  }
+  if (lhs.machine_id() != rhs.machine_id()) {
+    return false;
+  }
+  if (lhs.type() != rhs.type()) {
+    return false;
+  }
+  return true;
+}
+
+std::string LinkCapability::to_string() const {
+  std::string str;
+  str += "{bandwidth: " + to_string_with_precision(bandwidth) + ",";
+  str += "latency: " + to_string_with_precision(latency) + "}";
+  return str;
+}
+
+LinkCapability LinkCapability::from_proto(const LinkCapabilityProto &proto) {
+  LinkCapability capability;
+  capability.bandwidth = proto.bandwidth();
+  capability.latency = proto.latency();
+  return capability;
+}
+
+LinkCapabilityProto LinkCapability::to_proto() const {
+  LinkCapabilityProto proto;
+  proto.set_bandwidth(bandwidth);
+  proto.set_latency(latency);
+  return proto;
+}
+
+std::string Link::to_string() const {
+  std::string str = "{source_id:" + std::to_string(source_id_) + ",";
+  str += "target_id:" + std::to_string(target_id_) + ",";
+  str += "type:" + type_ + ",";
+  str += "capability:" + capability_.to_string() + "}";
+  return str;
+}
+
+Link Link::from_proto(const LinkProto &proto) {
+  Link link;
+  link.source_id_ = proto.source_id();
+  link.target_id_ = proto.target_id();
+  link.type_ = proto.type();
+  link.capability_ = LinkCapability::from_proto(proto.capability());
+  return link;
+}
+
+LinkProto Link::to_proto() const {
+  LinkProto proto;
+  proto.set_source_id(source_id_);
+  proto.set_target_id(target_id_);
+  proto.set_type(type_);
+  proto.mutable_capability()->CopyFrom(capability_.to_proto());
+  return proto;
+}
+
+bool operator==(const Link &lhs, const Link &rhs) {
+  if (lhs.source_id() != rhs.source_id()) {
+    return false;
+  }
+  if (lhs.target_id() != rhs.target_id()) {
+    return false;
+  }
+  if (lhs.type() != rhs.type()) {
+    return false;
+  }
+  return true;
+}
+
+bool Machine::contains(int64_t device_id) const {
+  if (devices_.count(device_id) == 1) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void Machine::add_device(const Device &device) {
+  if (id() == -1) {
+    set_id(device.machine_id());
+  } else {
+    PADDLE_ENFORCE_EQ(device.machine_id(),
+                      id(),
+                      platform::errors::InvalidArgument(
+                          "The machine id [%d] of the device should be equal "
+                          "to this machine id [%d].",
+                          device.machine_id(),
+                          id_));
+  }
+  devices_[device.global_id()] = &device;
+}
+
+void Machine::add_link(const Link &link) {
+  PADDLE_ENFORCE_EQ(contains(link.source_id()),
+                    true,
+                    platform::errors::InvalidArgument(
+                        "The source device id of the added link [%s] "
+                        "cannot be found in the device_ids. Please add the "
+                        "source device before adding this link",
+                        std::to_string(link.source_id())));
+  links_[link.source_id()][link.target_id()] = &link;
+}
+
+std::string Machine::to_string() const {
+  std::string str = "{devices: [";
+  for (const auto &device : devices_) {
+    str += device.second->to_string() + ", ";
+  }
+  str.replace(str.size() - 2, 2, "], ");
+
+  str += "links: [";
+  for (const auto &item : links_) {
+    str += "{";
+    str += "source_id: " + std::to_string(item.first) + ", neighbors: [";
+    for (const auto &link : item.second) {
+      str += link.second->to_string() + ", ";
+    }
+    str.replace(str.size() - 2, 2, "]}, ");
+  }
+  str.replace(str.size() - 4, 4, "]}");
+  return str;
+}
+
+DeviceMesh::DeviceMesh(const std::string &name,
+                       const std::vector<int64_t> &shape,
+                       const std::vector<int64_t> &device_ids,
+                       const std::vector<std::string> &dim_names) {
+  name_ = name;
+  shape_ = shape;
+  int64_t size = this->size();
+
+  PADDLE_ENFORCE_EQ(size,
+                    device_ids.size(),
+                    platform::errors::InvalidArgument(
+                        "The size %d of this device mesh must be "
+                        "equal to the size %d of its device ids.",
+                        size,
+                        device_ids.size()));
+  PADDLE_ENFORCE_EQ(
+      has_duplicates(device_ids),
+      false,
+      platform::errors::InvalidArgument("The device ids [%s] must be unique.",
+                                        str_join(device_ids)));
+  device_ids_ = device_ids;
+
+  PADDLE_ENFORCE_EQ(
+      shape_.size(),
+      dim_names.size(),
+      platform::errors::InvalidArgument(
+          "The size %d of mesh shape must be equal to the size %d "
+          "of the dimension names.",
+          shape_.size(),
+          dim_names.size()));
+  PADDLE_ENFORCE_EQ(has_duplicates(dim_names),
+                    false,
+                    platform::errors::InvalidArgument(
+                        "The names [%s] of each dimension must be unique.",
+                        str_join(dim_names)));
+  dim_names_ = dim_names;
+}
+
+int64_t DeviceMesh::size() const {
+  if (shape_.empty()) return 0;
+  int64_t size = 1;
+  for (const int64_t dim_size : shape_) size *= dim_size;
+  return size;
+}
+
+bool DeviceMesh::contains(int64_t device_id) const {
+  auto result =
+      std::find(std::begin(device_ids_), std::end(device_ids_), device_id);
+  if (result != std::end(device_ids_)) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void DeviceMesh::add_device(const Device &device) {
+  PADDLE_ENFORCE_EQ(
+      contains(device.global_id()),
+      true,
+      platform::errors::InvalidArgument(
+          "The added device id [%s] cannot be found in the device_ids.",
+          std::to_string(device.global_id())));
+  // Operator [] will create a new object if it cannot find one.
+  // So we add the default constructor for Device and Machine
+  // to make sure the new object can be created.
+  devices_[device.global_id()] = device;
+  machines_[device.machine_id()].add_device(devices_[device.global_id()]);
+}
+
+void DeviceMesh::add_link(const Link &link) {
+  PADDLE_ENFORCE_EQ(
+      contains(link.source_id()),
+      true,
+      platform::errors::InvalidArgument("The source id of the added link [%s] "
+                                        "cannot be found in the device_ids.",
+                                        std::to_string(link.source_id())));
+  PADDLE_ENFORCE_EQ(
+      contains(link.target_id()),
+      true,
+      platform::errors::InvalidArgument("The source id of the added link [%s] "
+                                        "cannot be found in the device_ids.",
+                                        std::to_string(link.target_id())));
+  // Operator [] will create a new object if it cannot find one.
+  // So we add the default constructor for Device and Machine
+  // to make sure the new object can be created.
+  links_[link.source_id()][link.target_id()] = link;
+  const Device &source_device = devices_[link.source_id()];
+  machines_[source_device.machine_id()].add_link(
+      links_[link.source_id()][link.target_id()]);
+}
+
+std::string DeviceMesh::to_string() const {
+  std::string mesh_str = "{name: " + name_ + ", ";
+  mesh_str += "shape: [" + str_join(shape_) + "], ";
+  mesh_str += "device_ids: [" + str_join(device_ids_) + "], ";
+  mesh_str += "dim_names: [" + str_join(dim_names_) + "], ";
+  mesh_str += "\ndevices: [\n";
+  for (const auto &device : devices_) {
+    mesh_str += "  " + device.second.to_string() + ",\n";
+  }
+  mesh_str.replace(mesh_str.size() - 2, 2, "],");
+
+  mesh_str += "\nlinks: [\n";
+  for (const auto &item : links_) {
+    mesh_str += "  {";
+    mesh_str += "source_id: " + std::to_string(item.first) + ", neighbors: [";
+    for (const auto &link : item.second) {
+      mesh_str += link.second.to_string() + ", ";
+    }
+    mesh_str.replace(mesh_str.size() - 2, 2, "]},\n");
+  }
+  mesh_str.replace(mesh_str.size() - 4, 4, "]}");
+  return mesh_str;
+}
+
+DeviceMesh DeviceMesh::from_proto(const DeviceMeshProto &proto) {
+  DeviceMesh mesh;
+
+  mesh.name_ = proto.name();
+
+  mesh.shape_.resize(proto.shape_size());
+  for (int64_t i = 0; i < proto.shape_size(); ++i) {
+    mesh.shape_[i] = proto.shape(i);
+  }
+
+  mesh.device_ids_.resize(proto.device_ids_size());
+  for (int64_t i = 0; i < proto.device_ids_size(); ++i) {
+    mesh.device_ids_[i] = proto.device_ids(i);
+  }
+
+  mesh.dim_names_.resize(proto.dim_names_size());
+  for (int64_t i = 0; i < proto.dim_names_size(); ++i) {
+    mesh.dim_names_[i] = proto.dim_names(i);
+  }
+
+  for (int64_t i = 0; i < proto.devices_size(); ++i) {
+    mesh.add_device(Device::from_proto(proto.devices(i)));
+  }
+
+  for (int64_t i = 0; i < proto.links_size(); ++i) {
+    mesh.add_link(Link::from_proto(proto.links(i)));
+  }
+
+  return mesh;
+}
+
+DeviceMeshProto DeviceMesh::to_proto() const {
+  DeviceMeshProto proto;
+
+  proto.set_name(name_);
+
+  for (const auto &i : shape_) {
+    proto.add_shape(i);
+  }
+
+  for (const auto &i : device_ids_) {
+    proto.add_device_ids(i);
+  }
+
+  for (const auto &i : dim_names_) {
+    proto.add_dim_names(i);
+  }
+
+  for (const auto &device : devices_) {
+    proto.mutable_devices()->Add()->CopyFrom(device.second.to_proto());
+  }
+
+  for (const auto &neighbors : links_) {
+    for (const auto &link : neighbors.second) {
+      proto.mutable_links()->Add()->CopyFrom(link.second.to_proto());
+    }
+  }
+
+  return proto;
+}
+
+bool operator==(const DeviceMesh &lhs, const DeviceMesh &rhs) {
+  // Use the unique name to do the fast comparison
+  if (lhs.name() != rhs.name()) {
+    return false;
+  }
+  return true;
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/device_mesh.h b/paddle/fluid/distributed/auto_parallel/device_mesh.h
new file mode 100644
index 0000000000000..15ec50f546d30
--- /dev/null
+++ b/paddle/fluid/distributed/auto_parallel/device_mesh.h
@@ -0,0 +1,273 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <iostream>
+#include <iterator>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/distributed/auto_parallel/auto_parallel.pb.h"
+#include "paddle/fluid/distributed/auto_parallel/utils.h"
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+struct DeviceCapability {
+  double single_precision_flops = 0.0;
+  double double_precision_flops = 0.0;
+  double memory_size_in_bytes = 0.0;
+  double clock_rate_in_ghz = 0.0;
+
+  // DeviceCapability from_string(const std::string& str);
+  std::string to_string() const;
+
+  static DeviceCapability from_proto(const DeviceCapabilityProto& proto);
+  DeviceCapabilityProto to_proto() const;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const DeviceCapability& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+class Device {
+ public:
+  Device() = default;
+  Device(int64_t global_id,
+         int64_t local_id,
+         int64_t machine_id,
+         const std::string& type)
+      : global_id_(global_id),
+        local_id_(local_id),
+        machine_id_(machine_id),
+        type_(type) {}
+
+  int64_t global_id() const { return global_id_; }
+  int64_t local_id() const { return local_id_; }
+  int64_t machine_id() const { return machine_id_; }
+  const std::string& type() const { return type_; }
+
+  const DeviceCapability& capability() const { return capability_; }
+  void set_capability(const DeviceCapability& capability) {
+    capability_ = capability;
+  }
+
+  // Device from_string(const std::string& mesh_str);
+  std::string to_string() const;
+
+  static Device from_proto(const DeviceProto& proto);
+  DeviceProto to_proto() const;
+
+ private:
+  int64_t global_id_;
+  int64_t local_id_;
+  int64_t machine_id_;
+  std::string type_;
+  DeviceCapability capability_;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const Device& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+bool operator==(const Device& lhs, const Device& rhs);
+
+inline bool operator!=(const Device& lhs, const Device& rhs) {
+  return !operator==(lhs, rhs);
+}
+
+struct LinkCapability {
+  double bandwidth = 0.0;  // Bytes/s
+  double latency = 0.0;
+
+  // LinkCapability from_string(const std::string& str);
+  std::string to_string() const;
+
+  static LinkCapability from_proto(const LinkCapabilityProto& proto);
+  LinkCapabilityProto to_proto() const;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const LinkCapability& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+class Link {
+ public:
+  Link() = default;
+
+  Link(int64_t source_id, int64_t target_id, const std::string& type)
+      : source_id_(source_id), target_id_(target_id), type_(type) {}
+
+  int64_t source_id() const { return source_id_; }
+  int64_t target_id() const { return target_id_; }
+  const std::string& type() const { return type_; }
+
+  const LinkCapability& capability() const { return capability_; }
+  void set_capability(const LinkCapability& capability) {
+    capability_ = capability;
+  }
+
+  // Link from_string(const std::string& str);
+  std::string to_string() const;
+
+  static Link from_proto(const LinkProto& proto);
+  LinkProto to_proto() const;
+
+ private:
+  int64_t source_id_;
+  int64_t target_id_;
+  std::string type_;
+  LinkCapability capability_;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const Link& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+bool operator==(const Link& lhs, const Link& rhs);
+
+inline bool operator!=(const Link& lhs, const Link& rhs) {
+  return !operator==(lhs, rhs);
+}
+
+class Machine {
+ public:
+  Machine() = default;
+
+  explicit Machine(int64_t id) : id_(id) {}
+
+  int64_t id() const { return id_; }
+
+  void set_id(int64_t id) { id_ = id; }
+
+  bool contains(int64_t device_id) const;
+
+  void add_device(const Device& device);
+
+  void add_link(const Link& link);
+
+  // Machine from_string(const std::string& str);
+  std::string to_string() const;
+
+ private:
+  int64_t id_ = -1;
+  std::unordered_map<int64_t, const Device*> devices_;
+  std::unordered_map<int64_t, std::unordered_map<int64_t, const Link*>> links_;
+};
+
+class DeviceMesh {
+ public:
+  DeviceMesh() = default;
+
+  DeviceMesh(const std::string& name,
+             const std::vector<int64_t>& shape,
+             const std::vector<int64_t>& device_ids,
+             const std::vector<std::string>& dim_names);
+
+  const std::string& name() const { return name_; }
+
+  void set_name(const std::string& name) { name_ = name; }
+
+  const std::vector<int64_t>& shape() const { return shape_; }
+
+  const std::vector<int64_t>& device_ids() const { return device_ids_; }
+
+  const std::vector<std::string>& dim_names() const { return dim_names_; }
+
+  std::string device_type() const {
+    if (empty()) return std::string();
+    return std::begin(devices_)->second.type();
+  }
+
+  const std::unordered_map<int64_t, Device>& devices() const {
+    return devices_;
+  }
+
+  const std::unordered_map<int64_t, std::unordered_map<int64_t, Link>>& links()
+      const {
+    return links_;
+  }
+
+  const Device& device(int64_t global_id) const {
+    return devices_.at(global_id);
+  }
+
+  const Link& link(int64_t source_id, int64_t target_id) const {
+    return links_.at(source_id).at(target_id);
+  }
+
+  int64_t size() const;
+  int64_t ndim() const { return shape_.size(); }
+
+  int64_t dim_size(int64_t dim) const {
+    int64_t cdim = canonical_dim(dim, shape_.size());
+    return shape_[cdim];
+  }
+
+  int64_t dim_size(const std::string& dim_name) const {
+    for (std::size_t i = 0; i < dim_names_.size(); ++i) {
+      if (dim_names_[i] == dim_name) {
+        return shape_[i];
+      }
+    }
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "Cannot find the dimension of %s in this device mesh.", dim_name));
+  }
+
+  bool empty() const { return (shape_.empty() || device_ids_.empty()); }
+  bool contains(int64_t device_id) const;
+
+  void add_device(const Device& device);
+  void add_link(const Link& link);
+
+  // DeviceMesh from_string(const std::string& mesh_str);
+  std::string to_string() const;
+
+  static DeviceMesh from_proto(const DeviceMeshProto& proto);
+  DeviceMeshProto to_proto() const;
+
+ private:
+  std::string name_;
+  std::vector<int64_t> shape_;
+  std::vector<int64_t> device_ids_;
+  std::vector<std::string> dim_names_;
+  std::unordered_map<int64_t, Device> devices_;
+  std::unordered_map<int64_t, std::unordered_map<int64_t, Link>> links_;
+  std::unordered_map<int64_t, Machine> machines_;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const DeviceMesh& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+bool operator==(const DeviceMesh& lhs, const DeviceMesh& rhs);
+
+inline bool operator!=(const DeviceMesh& lhs, const DeviceMesh& rhs) {
+  return !operator==(lhs, rhs);
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/device_mesh_test.cc b/paddle/fluid/distributed/auto_parallel/device_mesh_test.cc
new file mode 100644
index 0000000000000..bdfc13baa424d
--- /dev/null
+++ b/paddle/fluid/distributed/auto_parallel/device_mesh_test.cc
@@ -0,0 +1,93 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/distributed/auto_parallel/device_mesh.h"
+#include <iostream>
+#include <sstream>
+#include "gtest/gtest.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+TEST(DeviceMesh, Ctor) {
+  std::vector<int64_t> shape = {2, 3};
+  std::vector<int64_t> device_ids = {0, 1, 2, 3, 4, 5};
+  std::vector<std::string> dim_names = {"x", "y"};
+  std::string device_type = "GPU";
+  int64_t size = shape[0] * shape[1];
+
+  DeviceMesh device_mesh("mesh", shape, device_ids, dim_names);
+  for (int64_t i = 0; i < shape[0]; ++i) {
+    for (int64_t j = 0; j < shape[1]; ++j) {
+      int64_t global_id = i * shape[1] + j;
+      int64_t local_id = j;
+      int64_t machine_id = i;
+      device_mesh.add_device(
+          Device(global_id, local_id, machine_id, device_type));
+    }
+  }
+  for (int64_t i = 0; i < size; ++i) {
+    for (int64_t j = 0; j < size; ++j) {
+      device_mesh.add_link(Link(i, j, "NVL"));
+    }
+  }
+
+  EXPECT_EQ(device_mesh.name(), "mesh");
+  EXPECT_EQ(device_mesh.shape(), shape);
+  EXPECT_EQ(device_mesh.device_ids(), device_ids);
+  EXPECT_EQ(device_mesh.dim_names()[0], "x");
+  EXPECT_EQ(device_mesh.dim_names()[1], "y");
+  EXPECT_EQ(device_mesh.device_type(), device_type);
+  EXPECT_EQ(device_mesh.size(), size);
+  EXPECT_EQ(device_mesh.ndim(), static_cast<int64_t>(shape.size()));
+  EXPECT_EQ(device_mesh.dim_size(0), shape[0]);
+  EXPECT_EQ(device_mesh.dim_size(-1), shape[1]);
+  EXPECT_EQ(device_mesh.dim_size("x"), shape[0]);
+  EXPECT_EQ(device_mesh.dim_size("y"), shape[1]);
+  EXPECT_EQ(device_mesh.empty(), false);
+  EXPECT_EQ(device_mesh.contains(0), true);
+  EXPECT_EQ(device_mesh.contains(6), false);
+  EXPECT_EQ(device_mesh.device(3).global_id(), 3);
+  EXPECT_EQ(device_mesh.device(3).local_id(), 0);
+  EXPECT_EQ(device_mesh.device(3).machine_id(), 1);
+  EXPECT_EQ(device_mesh.device(3).type(), "GPU");
+  EXPECT_EQ(device_mesh.link(3, 4).source_id(), 3);
+  EXPECT_EQ(device_mesh.link(3, 4).target_id(), 4);
+  EXPECT_EQ(device_mesh.link(3, 4).type(), "NVL");
+  for (int64_t i = 0; i < shape[0]; ++i) {
+    for (int64_t j = 0; j < shape[1]; ++j) {
+      int64_t global_id = i * shape[1] + j;
+      int64_t local_id = j;
+      int64_t machine_id = i;
+      auto device = device_mesh.devices().at(global_id);
+      EXPECT_EQ(device, Device(global_id, local_id, machine_id, device_type));
+    }
+  }
+  for (int64_t i = 0; i < size; ++i) {
+    for (int64_t j = 0; j < size; ++j) {
+      EXPECT_EQ(device_mesh.links().at(i).at(j), Link(i, j, "NVL"));
+    }
+  }
+  std::stringstream sstream;
+  sstream << device_mesh;
+  EXPECT_EQ(sstream.str(), device_mesh.to_string());
+  auto proto = device_mesh.to_proto();
+  DeviceMesh new_device_mesh = DeviceMesh::from_proto(proto);
+  EXPECT_EQ(device_mesh, new_device_mesh);
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/dist_mapper.cc b/paddle/fluid/distributed/auto_parallel/dist_mapper.cc
new file mode 100644
index 0000000000000..d0995604522e5
--- /dev/null
+++ b/paddle/fluid/distributed/auto_parallel/dist_mapper.cc
@@ -0,0 +1,146 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <algorithm>
+
+#include "paddle/fluid/distributed/auto_parallel/dist_mapper.h"
+#include "paddle/fluid/distributed/auto_parallel/utils.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+void DistributedMapper::set_process_id_to_device_ids(
+    const std::map<int64_t, std::pair<std::string, std::vector<int64_t>>>&
+        process_id_to_device_ids) {
+  std::vector<std::string> device_mesh_names;
+  for (const auto& item : device_meshes_) {
+    device_mesh_names.push_back(item.first);
+  }
+  for (const auto& item : process_id_to_device_ids) {
+    PADDLE_ENFORCE_GE(
+        item.first,
+        0,
+        platform::errors::InvalidArgument(
+            "The process id %d must be greater than or equal to 0.",
+            item.first));
+    std::string device_mesh_name = item.second.first;
+    const std::vector<int64_t>& device_ids = item.second.second;
+    PADDLE_ENFORCE_EQ(
+        device_meshes_.count(device_mesh_name),
+        1,
+        platform::errors::InvalidArgument(
+            "Cannot find the device mesh %d in device_mesh ids [%s].",
+            device_mesh_name,
+            str_join(device_mesh_names)));
+    PADDLE_ENFORCE_EQ(
+        has_duplicates(device_ids),
+        false,
+        platform::errors::InvalidArgument(
+            "The mapped device ids [%s] of process_mesh %d must be unique.",
+            str_join(device_ids),
+            item.first));
+    const DeviceMesh& device_mesh = device_meshes_[device_mesh_name];
+    const std::vector<int64_t> cur_device_ids = device_mesh.device_ids();
+    for (int64_t device_id : device_ids) {
+      bool found =
+          std::find(cur_device_ids.begin(), cur_device_ids.end(), device_id) !=
+          cur_device_ids.end();
+      PADDLE_ENFORCE_EQ(
+          found,
+          true,
+          platform::errors::InvalidArgument(
+              "The device id %d cannot be find in the device mesh [%s].",
+              device_id,
+              str_join(cur_device_ids)));
+    }
+  }
+  process_id_to_device_ids_ = process_id_to_device_ids;
+}
+
+DistributedMapper DistributedMapper::from_proto(
+    const DistributedMapperProto& proto) {
+  DistributedMapper dist_mapper;
+  for (int64_t i = 0; i < proto.device_meshes_size(); ++i) {
+    dist_mapper.device_meshes_[proto.device_meshes(i).name()] =
+        DeviceMesh::from_proto(proto.device_meshes(i));
+  }
+  for (int64_t i = 0; i < proto.process_id_to_device_ids_size(); ++i) {
+    int64_t process_id = proto.process_id_to_device_ids(i).process_id();
+    std::string device_mesh_name =
+        proto.process_id_to_device_ids(i).device_mesh_name();
+    std::vector<int64_t> device_ids;
+    int64_t num_devices = proto.process_id_to_device_ids(i).device_ids_size();
+    for (int64_t j = 0; j < num_devices; ++j) {
+      device_ids.push_back(proto.process_id_to_device_ids(i).device_ids(j));
+    }
+    dist_mapper.process_id_to_device_ids_[process_id].first = device_mesh_name;
+    dist_mapper.process_id_to_device_ids_[process_id].second = device_ids;
+  }
+  return dist_mapper;
+}
+
+DistributedMapperProto DistributedMapper::to_proto() const {
+  DistributedMapperProto proto;
+  for (const auto& item : device_meshes_) {
+    proto.mutable_device_meshes()->Add()->CopyFrom(item.second.to_proto());
+  }
+  for (const auto& outer : process_id_to_device_ids_) {
+    auto proto_item = proto.mutable_process_id_to_device_ids()->Add();
+    proto_item->set_process_id(outer.first);
+    proto_item->set_device_mesh_name(outer.second.first);
+    for (const auto& inner : outer.second.second) {
+      proto_item->add_device_ids(inner);
+    }
+  }
+  return proto;
+}
+
+std::string DistributedMapper::to_string() const {
+  std::string mapper_str = "{device_meshes: [";
+  for (const auto& item : device_meshes_) {
+    mapper_str += item.second.to_string() + ", ";
+  }
+  mapper_str.replace(mapper_str.size() - 2, 2, "]");
+
+  mapper_str += "\nprocess_id_to_device_ids: [";
+  for (const auto& item : process_id_to_device_ids_) {
+    mapper_str += "{";
+    mapper_str +=
+        "process_id: " + std::to_string(item.first) + ", device_ids: [";
+    for (const auto& device_id : item.second.second) {
+      mapper_str +=
+          "{" + item.second.first + ", " + std::to_string(device_id) + "}, ";
+    }
+    mapper_str.replace(mapper_str.size() - 2, 2, "]");
+    mapper_str += "}, ";
+  }
+  mapper_str.replace(mapper_str.size() - 2, 2, "]");
+  mapper_str += "}";
+  return mapper_str;
+}
+
+bool operator==(const DistributedMapper& lhs, const DistributedMapper& rhs) {
+  if (lhs.device_meshes() != rhs.device_meshes()) {
+    return false;
+  }
+  if (lhs.process_id_to_device_ids() != rhs.process_id_to_device_ids()) {
+    return false;
+  }
+  return true;
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/dist_mapper.h b/paddle/fluid/distributed/auto_parallel/dist_mapper.h
new file mode 100644
index 0000000000000..bd7f9790ad69f
--- /dev/null
+++ b/paddle/fluid/distributed/auto_parallel/dist_mapper.h
@@ -0,0 +1,73 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+
+#include <utility>
+
+#include "paddle/fluid/distributed/auto_parallel/auto_parallel.pb.h"
+#include "paddle/fluid/distributed/auto_parallel/device_mesh.h"
+#include "paddle/fluid/distributed/auto_parallel/process_mesh.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+class DistributedMapper {
+ public:
+  DistributedMapper() = default;
+
+  const std::map<std::string, DeviceMesh>& device_meshes() const {
+    return device_meshes_;
+  }
+
+  const DeviceMesh& device_mesh(const std::string& name) const {
+    return device_meshes_.at(name);
+  }
+
+  void add_device_mesh(const DeviceMesh& device_mesh) {
+    device_meshes_[device_mesh.name()] = device_mesh;
+  }
+
+  const std::map<int64_t, std::pair<std::string, std::vector<int64_t>>>&
+  process_id_to_device_ids() const {
+    return process_id_to_device_ids_;
+  }
+
+  void set_process_id_to_device_ids(
+      const std::map<int64_t, std::pair<std::string, std::vector<int64_t>>>&
+          process_id_to_device_ids);
+
+  // DistributedMapper from_string(const std::string& mapper_str);
+  std::string to_string() const;
+
+  static DistributedMapper from_proto(const DistributedMapperProto& proto);
+  DistributedMapperProto to_proto() const;
+
+ private:
+  std::map<std::string, DeviceMesh> device_meshes_;
+  std::map<int64_t, std::pair<std::string, std::vector<int64_t>>>
+      process_id_to_device_ids_;
+};
+
+bool operator==(const DistributedMapper& lhs, const DistributedMapper& rhs);
+
+inline std::ostream& operator<<(std::ostream& os,
+                                const DistributedMapper& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/dist_mapper_test.cc b/paddle/fluid/distributed/auto_parallel/dist_mapper_test.cc
new file mode 100644
index 0000000000000..d427b9cbb09ed
--- /dev/null
+++ b/paddle/fluid/distributed/auto_parallel/dist_mapper_test.cc
@@ -0,0 +1,72 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/distributed/auto_parallel/dist_mapper.h"
+#include <map>
+#include <sstream>
+#include "gtest/gtest.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+TEST(DistributedMapper, Ctor) {
+  std::vector<int64_t> shape = {2, 3};
+  std::vector<int64_t> device_ids = {0, 1, 2, 3, 4, 5};
+  std::vector<std::string> dim_names = {"x", "y"};
+  std::string device_type = "GPU";
+  int64_t size = shape[0] * shape[1];
+
+  DeviceMesh device_mesh("device_mesh", shape, device_ids, dim_names);
+  for (int64_t i = 0; i < shape[0]; ++i) {
+    for (int64_t j = 0; j < shape[1]; ++j) {
+      int64_t global_id = i * shape[1] + j;
+      int64_t local_id = j;
+      int64_t machine_id = i;
+      device_mesh.add_device(
+          Device(global_id, local_id, machine_id, device_type));
+    }
+  }
+  for (int64_t i = 0; i < size; ++i) {
+    for (int64_t j = 0; j < size; ++j) {
+      device_mesh.add_link(Link(i, j, "NVL"));
+    }
+  }
+
+  DistributedMapper dist_mapper;
+  dist_mapper.add_device_mesh(device_mesh);
+  std::map<int64_t, std::pair<std::string, std::vector<int64_t>>>
+      process_id_to_device_ids;
+  process_id_to_device_ids[0] = {"device_mesh", {5}};
+  process_id_to_device_ids[1] = {"device_mesh", {4}};
+  process_id_to_device_ids[2] = {"device_mesh", {3}};
+  process_id_to_device_ids[3] = {"device_mesh", {2}};
+  process_id_to_device_ids[4] = {"device_mesh", {1}};
+  process_id_to_device_ids[5] = {"device_mesh", {0}};
+  dist_mapper.set_process_id_to_device_ids(process_id_to_device_ids);
+
+  EXPECT_EQ(dist_mapper.device_meshes().at("device_mesh"), device_mesh);
+  EXPECT_EQ(dist_mapper.device_mesh("device_mesh"), device_mesh);
+  EXPECT_EQ(dist_mapper.process_id_to_device_ids(), process_id_to_device_ids);
+  std::stringstream sstream;
+  sstream << dist_mapper;
+  EXPECT_EQ(sstream.str(), dist_mapper.to_string());
+  auto proto = dist_mapper.to_proto();
+  DistributedMapper new_dist_mapper = DistributedMapper::from_proto(proto);
+  EXPECT_EQ(dist_mapper, new_dist_mapper);
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/process_mesh.cc b/paddle/fluid/distributed/auto_parallel/process_mesh.cc
new file mode 100644
index 0000000000000..dda2873768997
--- /dev/null
+++ b/paddle/fluid/distributed/auto_parallel/process_mesh.cc
@@ -0,0 +1,134 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <algorithm>
+#include <iterator>
+
+#include "paddle/fluid/distributed/auto_parallel/process_mesh.h"
+#include "paddle/fluid/distributed/auto_parallel/utils.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+ProcessMesh::ProcessMesh(const std::vector<int64_t> &shape,
+                         const std::vector<int64_t> &process_ids,
+                         const std::vector<std::string> &dim_names) {
+  shape_ = shape;
+  int64_t size = this->size();
+  PADDLE_ENFORCE_EQ(
+      size,
+      process_ids.size(),
+      platform::errors::InvalidArgument("The size of this process mesh must be "
+                                        "equal to the size of its process ids.",
+                                        size,
+                                        process_ids.size()));
+  PADDLE_ENFORCE_EQ(
+      has_duplicates(process_ids),
+      false,
+      platform::errors::InvalidArgument("The process ids [%s] must be unique.",
+                                        str_join(process_ids_)));
+  process_ids_ = process_ids;
+
+  PADDLE_ENFORCE_EQ(shape_.size(),
+                    dim_names.size(),
+                    platform::errors::InvalidArgument(
+                        "The size of mesh shape must be equal to the size "
+                        "of the dimension names.",
+                        shape_.size(),
+                        dim_names_.size()));
+  PADDLE_ENFORCE_EQ(has_duplicates(dim_names),
+                    false,
+                    platform::errors::InvalidArgument(
+                        "The names [%s] of each dimension must be unique.",
+                        str_join(dim_names)));
+  dim_names_ = dim_names;
+}
+
+int64_t ProcessMesh::size() const {
+  if (shape_.empty()) return 0;
+  int64_t size = 1;
+  for (const int64_t dim_size : shape_) size *= dim_size;
+  return size;
+}
+
+bool ProcessMesh::contains(int64_t process_id) const {
+  auto result =
+      std::find(std::begin(process_ids_), std::end(process_ids_), process_id);
+  if (result != std::end(process_ids_)) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+std::string ProcessMesh::to_string() const {
+  std::string mesh_str = "{shape: [" + str_join(shape_) + "], ";
+  mesh_str += "process_ids: [" + str_join(process_ids_) + "], ";
+  mesh_str += "dim_names: [" + str_join(dim_names_) + "]}";
+  return mesh_str;
+}
+
+ProcessMesh ProcessMesh::from_proto(const ProcessMeshProto &proto) {
+  ProcessMesh mesh;
+
+  mesh.shape_.resize(proto.shape_size());
+  for (int64_t i = 0; i < proto.shape_size(); ++i) {
+    mesh.shape_[i] = proto.shape(i);
+  }
+
+  mesh.process_ids_.resize(proto.process_ids_size());
+  for (int64_t i = 0; i < proto.process_ids_size(); ++i) {
+    mesh.process_ids_[i] = proto.process_ids(i);
+  }
+
+  mesh.dim_names_.resize(proto.dim_names_size());
+  for (int64_t i = 0; i < proto.dim_names_size(); ++i) {
+    mesh.dim_names_[i] = proto.dim_names(i);
+  }
+
+  return mesh;
+}
+
+ProcessMeshProto ProcessMesh::to_proto() const {
+  ProcessMeshProto proto;
+
+  for (const auto &i : shape_) {
+    proto.add_shape(i);
+  }
+
+  for (const auto &i : process_ids_) {
+    proto.add_process_ids(i);
+  }
+
+  for (const auto &i : dim_names_) {
+    proto.add_dim_names(i);
+  }
+
+  return proto;
+}
+
+bool operator==(const ProcessMesh &lhs, const ProcessMesh &rhs) {
+  if (lhs.shape() != rhs.shape()) {
+    return false;
+  }
+  if (lhs.process_ids() != rhs.process_ids()) {
+    return false;
+  }
+  return true;
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/process_mesh.h b/paddle/fluid/distributed/auto_parallel/process_mesh.h
new file mode 100644
index 0000000000000..2652a8f606216
--- /dev/null
+++ b/paddle/fluid/distributed/auto_parallel/process_mesh.h
@@ -0,0 +1,94 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/distributed/auto_parallel/auto_parallel.pb.h"
+#include "paddle/fluid/distributed/auto_parallel/device_mesh.h"
+#include "paddle/fluid/distributed/auto_parallel/utils.h"
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+class ProcessMesh {
+ public:
+  ProcessMesh() = default;
+
+  ProcessMesh(const std::vector<int64_t>& shape,
+              const std::vector<int64_t>& process_ids,
+              const std::vector<std::string>& dim_names);
+
+  const std::vector<int64_t>& shape() const { return shape_; }
+
+  const std::vector<int64_t>& process_ids() const { return process_ids_; }
+
+  const std::vector<std::string>& dim_names() const { return dim_names_; }
+
+  int64_t size() const;
+
+  int64_t ndim() const { return shape_.size(); }
+
+  int64_t dim_size(int64_t dim) const {
+    int64_t cdim = canonical_dim(dim, shape_.size());
+    return shape_[cdim];
+  }
+
+  int64_t dim_size(const std::string& dim_name) const {
+    for (std::size_t i = 0; i < dim_names_.size(); ++i) {
+      if (dim_names_[i] == dim_name) {
+        return shape_[i];
+      }
+    }
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "Cannot find the dimension of %s in this process mesh.", dim_name));
+  }
+
+  bool empty() const { return (shape_.empty() || process_ids_.empty()); }
+  bool contains(int64_t process_id) const;
+
+  // ProcessMesh from_string(const std::string& mesh_str);
+  std::string to_string() const;
+
+  static ProcessMesh from_proto(const ProcessMeshProto& proto);
+  ProcessMeshProto to_proto() const;
+
+ private:
+  std::vector<int64_t> shape_;
+  std::vector<int64_t> process_ids_;
+  std::vector<std::string> dim_names_;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const ProcessMesh& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+bool operator==(const ProcessMesh& lhs, const ProcessMesh& rhs);
+
+inline bool operator!=(const ProcessMesh& lhs, const ProcessMesh& rhs) {
+  return !operator==(lhs, rhs);
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/process_mesh_test.cc b/paddle/fluid/distributed/auto_parallel/process_mesh_test.cc
new file mode 100644
index 0000000000000..d0f3c5b510b43
--- /dev/null
+++ b/paddle/fluid/distributed/auto_parallel/process_mesh_test.cc
@@ -0,0 +1,54 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/distributed/auto_parallel/process_mesh.h"
+#include <iostream>
+#include <sstream>
+#include "gtest/gtest.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+TEST(ProcessMesh, Ctor) {
+  std::vector<int64_t> shape = {2, 3};
+  std::vector<int64_t> process_ids = {0, 1, 2, 3, 4, 5};
+  std::vector<std::string> dim_names = {"x", "y"};
+  int64_t size = shape[0] * shape[1];
+  ProcessMesh process_mesh(shape, process_ids, dim_names);
+  EXPECT_EQ(process_mesh.shape(), shape);
+  EXPECT_EQ(process_mesh.process_ids(), process_ids);
+  EXPECT_EQ(process_mesh.dim_names()[0], "x");
+  EXPECT_EQ(process_mesh.dim_names()[1], "y");
+  EXPECT_EQ(process_mesh.size(), size);
+  EXPECT_EQ(process_mesh.ndim(), static_cast<int64_t>(shape.size()));
+  EXPECT_EQ(process_mesh.dim_size(0), shape[0]);
+  EXPECT_EQ(process_mesh.dim_size(-1), shape[1]);
+  EXPECT_EQ(process_mesh.dim_size("x"), shape[0]);
+  EXPECT_EQ(process_mesh.dim_size("y"), shape[1]);
+  EXPECT_EQ(process_mesh.empty(), false);
+  EXPECT_EQ(process_mesh.contains(0), true);
+  EXPECT_EQ(process_mesh.contains(6), false);
+  std::stringstream sstream;
+  sstream << process_mesh;
+  EXPECT_EQ(sstream.str(), process_mesh.to_string());
+  auto proto = process_mesh.to_proto();
+  ProcessMesh new_process_mesh = ProcessMesh::from_proto(proto);
+  EXPECT_EQ(process_mesh, new_process_mesh);
+  std::cout << new_process_mesh << std::endl;
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/utils.h b/paddle/fluid/distributed/auto_parallel/utils.h
new file mode 100644
index 0000000000000..106cbea5e2d6f
--- /dev/null
+++ b/paddle/fluid/distributed/auto_parallel/utils.h
@@ -0,0 +1,105 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+
+#include <algorithm>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+// struct Indent {
+//   Indent(int &level) : level(level) { ++level; }
+//   ~Indent() { --level; }
+//   int &level;
+// };
+
+// inline std::string str_indent(std::string& str, cur_indent) {
+//   string spaces(cur_indent, " ");
+//   return str + std::string(cur_indent, " ");
+// }
+
+template <class T>
+bool has_duplicates(const std::vector<T>& vec) {
+  std::unordered_map<T, int> map;
+  for (const auto& i : vec) {
+    ++map[i];
+    if (map[i] > 1) return true;
+  }
+  return false;
+}
+
+inline int64_t canonical_dim(int dim, int ndim) {
+  PADDLE_ENFORCE_EQ(
+      dim >= -ndim && dim < ndim,
+      true,
+      platform::errors::InvalidArgument(
+          "Dimension %d is outside of [-%d, %d).", dim, ndim, ndim));
+  if (dim < 0) {
+    return dim + ndim;
+  }
+  return dim;
+}
+
+// Refer to https://stackoverflow.com/a/5289170
+template <typename Range, typename Value = typename Range::value_type>
+std::string str_join(Range const& elements,
+                     const std::string& delimiter = ",") {
+  std::ostringstream os;
+  auto b = std::begin(elements), e = std::end(elements);
+
+  if (b != e) {
+    std::copy(b, prev(e), std::ostream_iterator<Value>(os, delimiter.c_str()));
+    b = prev(e);
+  }
+  if (b != e) {
+    os << *b;
+  }
+
+  return os.str();
+}
+
+// Refer to https://stackoverflow.com/a/46931770
+inline std::vector<std::string> str_split(std::string const& input,
+                                          const std::string& delimiter = ",") {
+  size_t pos_start = 0, pos_end, delim_len = delimiter.length();
+  std::string token;
+  std::vector<std::string> output;
+  while ((pos_end = input.find(delimiter, pos_start)) != std::string::npos) {
+    token = input.substr(pos_start, pos_end - pos_start);
+    pos_start = pos_end + delim_len;
+    output.push_back(token);
+  }
+  output.push_back(input.substr(pos_start));
+  return output;
+}
+
+// Refer to https://stackoverflow.com/a/29200671/2358969
+template <typename T>
+std::string to_string_with_precision(const T a_value, const int n = 2) {
+  std::ostringstream out;
+  out.precision(n);
+  out << std::fixed << a_value;
+  return out.str();
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle

From d9946c570701618f4e3b204c2eea81a419f532c8 Mon Sep 17 00:00:00 2001
From: Yulong Ao <aoyulong@baidu.com>
Date: Sun, 7 Aug 2022 09:48:06 +0000
Subject: [PATCH 2/3] [Auto Parallel] Remove unecessary codes

---
 .../auto_parallel/auto_parallel.proto         |  85 ----------
 .../distributed/auto_parallel/dist_mapper.cc  | 146 ------------------
 .../distributed/auto_parallel/dist_mapper.h   |  73 ---------
 .../auto_parallel/dist_mapper_test.cc         |  72 ---------
 .../distributed/auto_parallel/process_mesh.cc | 134 ----------------
 .../distributed/auto_parallel/process_mesh.h  |  94 -----------
 .../auto_parallel/process_mesh_test.cc        |  54 -------
 7 files changed, 658 deletions(-)
 delete mode 100644 paddle/fluid/distributed/auto_parallel/dist_mapper.cc
 delete mode 100644 paddle/fluid/distributed/auto_parallel/dist_mapper.h
 delete mode 100644 paddle/fluid/distributed/auto_parallel/dist_mapper_test.cc
 delete mode 100644 paddle/fluid/distributed/auto_parallel/process_mesh.cc
 delete mode 100644 paddle/fluid/distributed/auto_parallel/process_mesh.h
 delete mode 100644 paddle/fluid/distributed/auto_parallel/process_mesh_test.cc

diff --git a/paddle/fluid/distributed/auto_parallel/auto_parallel.proto b/paddle/fluid/distributed/auto_parallel/auto_parallel.proto
index eae33e929a2d3..5625737c4426a 100644
--- a/paddle/fluid/distributed/auto_parallel/auto_parallel.proto
+++ b/paddle/fluid/distributed/auto_parallel/auto_parallel.proto
@@ -16,73 +16,6 @@ syntax = "proto2";
 
 package paddle.distributed.auto_parallel;
 
-// ProcessMesh is used to organize processes and like n-dimension array.
-message ProcessMeshProto {
-  // The size of each dimension.
-  repeated int64 shape = 1;
-
-  // These process ids are stored by a row-major way.
-  // There are no duplicate process ids within one process mesh.
-  repeated int64 process_ids = 2;
-
-  // The name of each dimension. 
-  repeated string dim_names = 3;
-
-}
-
-// // This distributed attribute describes how to distribute the corresponding tensor,
-// // and store any other information needed by auto parallel.
-// message TensorDistAttrProto {
-//   // The process mesh where a tensor is distributed.
-//   optional ProcessMeshProto process_mesh = 1;
-// 
-//   // The length of dims_mapping is same as the length of the tensor shape.
-//   // The i-th dimension of the tensor will be sharded by the dims_mapping[i]-th dimension 
-//   // of the above process mesh. If dims_mapping[i] is -1, the i-th dimension of the tensor
-//   // will not be sharded. For example, given a tensor shape [2, 6, 12], a process mesh
-//   // shape [2, 3] and a dims_mapping [-1, 1, 0], each sharded tensor will have a shape [2, 2, 6].
-//   repeated int64 dims_mapping = 2;
-// 
-//   // The batch dimension of the corresponding tensor. 
-//   optional int64 batch_dim = 3;
-// 
-//   // If the dynamic_dims[i] is True, the i-th dimension of the corresponding tensor 
-//   // is dynamic changed. Otherwise, the i-th dimension of the tensor is static determined. 
-//   repeated bool dynamic_dims = 4;
-// }
-// 
-// // This distributed attribute describes how to distribute the corresponding operator,
-// // and store any other information needed by auto parallel.
-// message OperatorDistAttrProto {
-//   message TensorDistAttrMappingEntryProto {
-//     optional string name = 1;
-//     optional TensorDistAttrProto tensor_dist_attr = 2;
-//   } 
-//   // The key of this map is the input tensor name and the value is the distributed attribute
-//   // of the input tensor required by this corresponding operator.   
-//   // The distributed attribute of the actual tensor may be not the same as that within 
-//   // the distributed attribute of the operator.
-//   repeated TensorDistAttrMappingEntryProto input_dist_attrs = 1;
-// 
-//   // The key of this map is the output tensor name and the value is the distributed attribute
-//   // of the output tensor required by this corresponding operator.   
-//   // The distributed attribute of the actual tensor may be not the same as that within 
-//   // the distributed attribute of the operator.
-//   repeated TensorDistAttrMappingEntryProto output_dist_attrs = 2;
-// 
-//   // The process mesh where a op is distributed.
-//   optional ProcessMeshProto process_mesh = 3;
-// 
-//   // A operator ideally has a distributed operator which may have multiple distributed implementations.
-//   // This filed is usually same as the operator type. However, some operators such as the element-wise operators
-//   // may shared the same distributed operator, the field is use for this scenario.
-//   optional string impl_type = 4;
-// 
-//   // This field tells which distributed implementations of this corresponding operator 
-//   // will be selected for the actual computation.
-//   optional int64 impl_idx = 5;
-// }
-
 // This proto describes the capability of one device such as the computation and memory.
 message DeviceCapabilityProto {
   optional double single_precision_flops = 1;
@@ -153,21 +86,3 @@ message DeviceMeshProto {
   // The links are between devices. 
   repeated LinkProto links = 6;
 }
-
-// Record the mapping between the logical processes and the physical devices.
-message DistributedMapperProto {
-  // The device meshes used by this distributed computation,
-  // which may be shared by different multiple device meshes.
-  repeated DeviceMeshProto device_meshes = 1;
-
-  message MapperEntryProto {
-    optional int64 process_id = 1;
-    optional string device_mesh_name = 2;
-    repeated int64 device_ids = 3;
-  }
-
-  // The mapping from process ids to device ids.
-  // It is also possible for one process to use multiple devices.
-  // It is possible for one device shared by multiple processes.
-  repeated MapperEntryProto process_id_to_device_ids = 2;
-}
diff --git a/paddle/fluid/distributed/auto_parallel/dist_mapper.cc b/paddle/fluid/distributed/auto_parallel/dist_mapper.cc
deleted file mode 100644
index d0995604522e5..0000000000000
--- a/paddle/fluid/distributed/auto_parallel/dist_mapper.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <algorithm>
-
-#include "paddle/fluid/distributed/auto_parallel/dist_mapper.h"
-#include "paddle/fluid/distributed/auto_parallel/utils.h"
-
-namespace paddle {
-namespace distributed {
-namespace auto_parallel {
-
-void DistributedMapper::set_process_id_to_device_ids(
-    const std::map<int64_t, std::pair<std::string, std::vector<int64_t>>>&
-        process_id_to_device_ids) {
-  std::vector<std::string> device_mesh_names;
-  for (const auto& item : device_meshes_) {
-    device_mesh_names.push_back(item.first);
-  }
-  for (const auto& item : process_id_to_device_ids) {
-    PADDLE_ENFORCE_GE(
-        item.first,
-        0,
-        platform::errors::InvalidArgument(
-            "The process id %d must be greater than or equal to 0.",
-            item.first));
-    std::string device_mesh_name = item.second.first;
-    const std::vector<int64_t>& device_ids = item.second.second;
-    PADDLE_ENFORCE_EQ(
-        device_meshes_.count(device_mesh_name),
-        1,
-        platform::errors::InvalidArgument(
-            "Cannot find the device mesh %d in device_mesh ids [%s].",
-            device_mesh_name,
-            str_join(device_mesh_names)));
-    PADDLE_ENFORCE_EQ(
-        has_duplicates(device_ids),
-        false,
-        platform::errors::InvalidArgument(
-            "The mapped device ids [%s] of process_mesh %d must be unique.",
-            str_join(device_ids),
-            item.first));
-    const DeviceMesh& device_mesh = device_meshes_[device_mesh_name];
-    const std::vector<int64_t> cur_device_ids = device_mesh.device_ids();
-    for (int64_t device_id : device_ids) {
-      bool found =
-          std::find(cur_device_ids.begin(), cur_device_ids.end(), device_id) !=
-          cur_device_ids.end();
-      PADDLE_ENFORCE_EQ(
-          found,
-          true,
-          platform::errors::InvalidArgument(
-              "The device id %d cannot be find in the device mesh [%s].",
-              device_id,
-              str_join(cur_device_ids)));
-    }
-  }
-  process_id_to_device_ids_ = process_id_to_device_ids;
-}
-
-DistributedMapper DistributedMapper::from_proto(
-    const DistributedMapperProto& proto) {
-  DistributedMapper dist_mapper;
-  for (int64_t i = 0; i < proto.device_meshes_size(); ++i) {
-    dist_mapper.device_meshes_[proto.device_meshes(i).name()] =
-        DeviceMesh::from_proto(proto.device_meshes(i));
-  }
-  for (int64_t i = 0; i < proto.process_id_to_device_ids_size(); ++i) {
-    int64_t process_id = proto.process_id_to_device_ids(i).process_id();
-    std::string device_mesh_name =
-        proto.process_id_to_device_ids(i).device_mesh_name();
-    std::vector<int64_t> device_ids;
-    int64_t num_devices = proto.process_id_to_device_ids(i).device_ids_size();
-    for (int64_t j = 0; j < num_devices; ++j) {
-      device_ids.push_back(proto.process_id_to_device_ids(i).device_ids(j));
-    }
-    dist_mapper.process_id_to_device_ids_[process_id].first = device_mesh_name;
-    dist_mapper.process_id_to_device_ids_[process_id].second = device_ids;
-  }
-  return dist_mapper;
-}
-
-DistributedMapperProto DistributedMapper::to_proto() const {
-  DistributedMapperProto proto;
-  for (const auto& item : device_meshes_) {
-    proto.mutable_device_meshes()->Add()->CopyFrom(item.second.to_proto());
-  }
-  for (const auto& outer : process_id_to_device_ids_) {
-    auto proto_item = proto.mutable_process_id_to_device_ids()->Add();
-    proto_item->set_process_id(outer.first);
-    proto_item->set_device_mesh_name(outer.second.first);
-    for (const auto& inner : outer.second.second) {
-      proto_item->add_device_ids(inner);
-    }
-  }
-  return proto;
-}
-
-std::string DistributedMapper::to_string() const {
-  std::string mapper_str = "{device_meshes: [";
-  for (const auto& item : device_meshes_) {
-    mapper_str += item.second.to_string() + ", ";
-  }
-  mapper_str.replace(mapper_str.size() - 2, 2, "]");
-
-  mapper_str += "\nprocess_id_to_device_ids: [";
-  for (const auto& item : process_id_to_device_ids_) {
-    mapper_str += "{";
-    mapper_str +=
-        "process_id: " + std::to_string(item.first) + ", device_ids: [";
-    for (const auto& device_id : item.second.second) {
-      mapper_str +=
-          "{" + item.second.first + ", " + std::to_string(device_id) + "}, ";
-    }
-    mapper_str.replace(mapper_str.size() - 2, 2, "]");
-    mapper_str += "}, ";
-  }
-  mapper_str.replace(mapper_str.size() - 2, 2, "]");
-  mapper_str += "}";
-  return mapper_str;
-}
-
-bool operator==(const DistributedMapper& lhs, const DistributedMapper& rhs) {
-  if (lhs.device_meshes() != rhs.device_meshes()) {
-    return false;
-  }
-  if (lhs.process_id_to_device_ids() != rhs.process_id_to_device_ids()) {
-    return false;
-  }
-  return true;
-}
-
-}  // namespace auto_parallel
-}  // namespace distributed
-}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/dist_mapper.h b/paddle/fluid/distributed/auto_parallel/dist_mapper.h
deleted file mode 100644
index bd7f9790ad69f..0000000000000
--- a/paddle/fluid/distributed/auto_parallel/dist_mapper.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-
-#include <utility>
-
-#include "paddle/fluid/distributed/auto_parallel/auto_parallel.pb.h"
-#include "paddle/fluid/distributed/auto_parallel/device_mesh.h"
-#include "paddle/fluid/distributed/auto_parallel/process_mesh.h"
-
-namespace paddle {
-namespace distributed {
-namespace auto_parallel {
-
-class DistributedMapper {
- public:
-  DistributedMapper() = default;
-
-  const std::map<std::string, DeviceMesh>& device_meshes() const {
-    return device_meshes_;
-  }
-
-  const DeviceMesh& device_mesh(const std::string& name) const {
-    return device_meshes_.at(name);
-  }
-
-  void add_device_mesh(const DeviceMesh& device_mesh) {
-    device_meshes_[device_mesh.name()] = device_mesh;
-  }
-
-  const std::map<int64_t, std::pair<std::string, std::vector<int64_t>>>&
-  process_id_to_device_ids() const {
-    return process_id_to_device_ids_;
-  }
-
-  void set_process_id_to_device_ids(
-      const std::map<int64_t, std::pair<std::string, std::vector<int64_t>>>&
-          process_id_to_device_ids);
-
-  // DistributedMapper from_string(const std::string& mapper_str);
-  std::string to_string() const;
-
-  static DistributedMapper from_proto(const DistributedMapperProto& proto);
-  DistributedMapperProto to_proto() const;
-
- private:
-  std::map<std::string, DeviceMesh> device_meshes_;
-  std::map<int64_t, std::pair<std::string, std::vector<int64_t>>>
-      process_id_to_device_ids_;
-};
-
-bool operator==(const DistributedMapper& lhs, const DistributedMapper& rhs);
-
-inline std::ostream& operator<<(std::ostream& os,
-                                const DistributedMapper& obj) {
-  os << obj.to_string();
-  return os;
-}
-
-}  // namespace auto_parallel
-}  // namespace distributed
-}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/dist_mapper_test.cc b/paddle/fluid/distributed/auto_parallel/dist_mapper_test.cc
deleted file mode 100644
index d427b9cbb09ed..0000000000000
--- a/paddle/fluid/distributed/auto_parallel/dist_mapper_test.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/distributed/auto_parallel/dist_mapper.h"
-#include <map>
-#include <sstream>
-#include "gtest/gtest.h"
-
-namespace paddle {
-namespace distributed {
-namespace auto_parallel {
-
-TEST(DistributedMapper, Ctor) {
-  std::vector<int64_t> shape = {2, 3};
-  std::vector<int64_t> device_ids = {0, 1, 2, 3, 4, 5};
-  std::vector<std::string> dim_names = {"x", "y"};
-  std::string device_type = "GPU";
-  int64_t size = shape[0] * shape[1];
-
-  DeviceMesh device_mesh("device_mesh", shape, device_ids, dim_names);
-  for (int64_t i = 0; i < shape[0]; ++i) {
-    for (int64_t j = 0; j < shape[1]; ++j) {
-      int64_t global_id = i * shape[1] + j;
-      int64_t local_id = j;
-      int64_t machine_id = i;
-      device_mesh.add_device(
-          Device(global_id, local_id, machine_id, device_type));
-    }
-  }
-  for (int64_t i = 0; i < size; ++i) {
-    for (int64_t j = 0; j < size; ++j) {
-      device_mesh.add_link(Link(i, j, "NVL"));
-    }
-  }
-
-  DistributedMapper dist_mapper;
-  dist_mapper.add_device_mesh(device_mesh);
-  std::map<int64_t, std::pair<std::string, std::vector<int64_t>>>
-      process_id_to_device_ids;
-  process_id_to_device_ids[0] = {"device_mesh", {5}};
-  process_id_to_device_ids[1] = {"device_mesh", {4}};
-  process_id_to_device_ids[2] = {"device_mesh", {3}};
-  process_id_to_device_ids[3] = {"device_mesh", {2}};
-  process_id_to_device_ids[4] = {"device_mesh", {1}};
-  process_id_to_device_ids[5] = {"device_mesh", {0}};
-  dist_mapper.set_process_id_to_device_ids(process_id_to_device_ids);
-
-  EXPECT_EQ(dist_mapper.device_meshes().at("device_mesh"), device_mesh);
-  EXPECT_EQ(dist_mapper.device_mesh("device_mesh"), device_mesh);
-  EXPECT_EQ(dist_mapper.process_id_to_device_ids(), process_id_to_device_ids);
-  std::stringstream sstream;
-  sstream << dist_mapper;
-  EXPECT_EQ(sstream.str(), dist_mapper.to_string());
-  auto proto = dist_mapper.to_proto();
-  DistributedMapper new_dist_mapper = DistributedMapper::from_proto(proto);
-  EXPECT_EQ(dist_mapper, new_dist_mapper);
-}
-
-}  // namespace auto_parallel
-}  // namespace distributed
-}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/process_mesh.cc b/paddle/fluid/distributed/auto_parallel/process_mesh.cc
deleted file mode 100644
index dda2873768997..0000000000000
--- a/paddle/fluid/distributed/auto_parallel/process_mesh.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <algorithm>
-#include <iterator>
-
-#include "paddle/fluid/distributed/auto_parallel/process_mesh.h"
-#include "paddle/fluid/distributed/auto_parallel/utils.h"
-
-namespace paddle {
-namespace distributed {
-namespace auto_parallel {
-
-ProcessMesh::ProcessMesh(const std::vector<int64_t> &shape,
-                         const std::vector<int64_t> &process_ids,
-                         const std::vector<std::string> &dim_names) {
-  shape_ = shape;
-  int64_t size = this->size();
-  PADDLE_ENFORCE_EQ(
-      size,
-      process_ids.size(),
-      platform::errors::InvalidArgument("The size of this process mesh must be "
-                                        "equal to the size of its process ids.",
-                                        size,
-                                        process_ids.size()));
-  PADDLE_ENFORCE_EQ(
-      has_duplicates(process_ids),
-      false,
-      platform::errors::InvalidArgument("The process ids [%s] must be unique.",
-                                        str_join(process_ids_)));
-  process_ids_ = process_ids;
-
-  PADDLE_ENFORCE_EQ(shape_.size(),
-                    dim_names.size(),
-                    platform::errors::InvalidArgument(
-                        "The size of mesh shape must be equal to the size "
-                        "of the dimension names.",
-                        shape_.size(),
-                        dim_names_.size()));
-  PADDLE_ENFORCE_EQ(has_duplicates(dim_names),
-                    false,
-                    platform::errors::InvalidArgument(
-                        "The names [%s] of each dimension must be unique.",
-                        str_join(dim_names)));
-  dim_names_ = dim_names;
-}
-
-int64_t ProcessMesh::size() const {
-  if (shape_.empty()) return 0;
-  int64_t size = 1;
-  for (const int64_t dim_size : shape_) size *= dim_size;
-  return size;
-}
-
-bool ProcessMesh::contains(int64_t process_id) const {
-  auto result =
-      std::find(std::begin(process_ids_), std::end(process_ids_), process_id);
-  if (result != std::end(process_ids_)) {
-    return true;
-  } else {
-    return false;
-  }
-}
-
-std::string ProcessMesh::to_string() const {
-  std::string mesh_str = "{shape: [" + str_join(shape_) + "], ";
-  mesh_str += "process_ids: [" + str_join(process_ids_) + "], ";
-  mesh_str += "dim_names: [" + str_join(dim_names_) + "]}";
-  return mesh_str;
-}
-
-ProcessMesh ProcessMesh::from_proto(const ProcessMeshProto &proto) {
-  ProcessMesh mesh;
-
-  mesh.shape_.resize(proto.shape_size());
-  for (int64_t i = 0; i < proto.shape_size(); ++i) {
-    mesh.shape_[i] = proto.shape(i);
-  }
-
-  mesh.process_ids_.resize(proto.process_ids_size());
-  for (int64_t i = 0; i < proto.process_ids_size(); ++i) {
-    mesh.process_ids_[i] = proto.process_ids(i);
-  }
-
-  mesh.dim_names_.resize(proto.dim_names_size());
-  for (int64_t i = 0; i < proto.dim_names_size(); ++i) {
-    mesh.dim_names_[i] = proto.dim_names(i);
-  }
-
-  return mesh;
-}
-
-ProcessMeshProto ProcessMesh::to_proto() const {
-  ProcessMeshProto proto;
-
-  for (const auto &i : shape_) {
-    proto.add_shape(i);
-  }
-
-  for (const auto &i : process_ids_) {
-    proto.add_process_ids(i);
-  }
-
-  for (const auto &i : dim_names_) {
-    proto.add_dim_names(i);
-  }
-
-  return proto;
-}
-
-bool operator==(const ProcessMesh &lhs, const ProcessMesh &rhs) {
-  if (lhs.shape() != rhs.shape()) {
-    return false;
-  }
-  if (lhs.process_ids() != rhs.process_ids()) {
-    return false;
-  }
-  return true;
-}
-
-}  // namespace auto_parallel
-}  // namespace distributed
-}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/process_mesh.h b/paddle/fluid/distributed/auto_parallel/process_mesh.h
deleted file mode 100644
index 2652a8f606216..0000000000000
--- a/paddle/fluid/distributed/auto_parallel/process_mesh.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <pybind11/pybind11.h>
-#include <atomic>
-#include <cstddef>
-#include <cstdint>
-#include <string>
-#include <vector>
-
-#include "paddle/fluid/distributed/auto_parallel/auto_parallel.pb.h"
-#include "paddle/fluid/distributed/auto_parallel/device_mesh.h"
-#include "paddle/fluid/distributed/auto_parallel/utils.h"
-#include "paddle/fluid/platform/enforce.h"
-
-namespace paddle {
-namespace distributed {
-namespace auto_parallel {
-
-class ProcessMesh {
- public:
-  ProcessMesh() = default;
-
-  ProcessMesh(const std::vector<int64_t>& shape,
-              const std::vector<int64_t>& process_ids,
-              const std::vector<std::string>& dim_names);
-
-  const std::vector<int64_t>& shape() const { return shape_; }
-
-  const std::vector<int64_t>& process_ids() const { return process_ids_; }
-
-  const std::vector<std::string>& dim_names() const { return dim_names_; }
-
-  int64_t size() const;
-
-  int64_t ndim() const { return shape_.size(); }
-
-  int64_t dim_size(int64_t dim) const {
-    int64_t cdim = canonical_dim(dim, shape_.size());
-    return shape_[cdim];
-  }
-
-  int64_t dim_size(const std::string& dim_name) const {
-    for (std::size_t i = 0; i < dim_names_.size(); ++i) {
-      if (dim_names_[i] == dim_name) {
-        return shape_[i];
-      }
-    }
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "Cannot find the dimension of %s in this process mesh.", dim_name));
-  }
-
-  bool empty() const { return (shape_.empty() || process_ids_.empty()); }
-  bool contains(int64_t process_id) const;
-
-  // ProcessMesh from_string(const std::string& mesh_str);
-  std::string to_string() const;
-
-  static ProcessMesh from_proto(const ProcessMeshProto& proto);
-  ProcessMeshProto to_proto() const;
-
- private:
-  std::vector<int64_t> shape_;
-  std::vector<int64_t> process_ids_;
-  std::vector<std::string> dim_names_;
-};
-
-inline std::ostream& operator<<(std::ostream& os, const ProcessMesh& obj) {
-  os << obj.to_string();
-  return os;
-}
-
-bool operator==(const ProcessMesh& lhs, const ProcessMesh& rhs);
-
-inline bool operator!=(const ProcessMesh& lhs, const ProcessMesh& rhs) {
-  return !operator==(lhs, rhs);
-}
-
-}  // namespace auto_parallel
-}  // namespace distributed
-}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/process_mesh_test.cc b/paddle/fluid/distributed/auto_parallel/process_mesh_test.cc
deleted file mode 100644
index d0f3c5b510b43..0000000000000
--- a/paddle/fluid/distributed/auto_parallel/process_mesh_test.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/distributed/auto_parallel/process_mesh.h"
-#include <iostream>
-#include <sstream>
-#include "gtest/gtest.h"
-
-namespace paddle {
-namespace distributed {
-namespace auto_parallel {
-
-TEST(ProcessMesh, Ctor) {
-  std::vector<int64_t> shape = {2, 3};
-  std::vector<int64_t> process_ids = {0, 1, 2, 3, 4, 5};
-  std::vector<std::string> dim_names = {"x", "y"};
-  int64_t size = shape[0] * shape[1];
-  ProcessMesh process_mesh(shape, process_ids, dim_names);
-  EXPECT_EQ(process_mesh.shape(), shape);
-  EXPECT_EQ(process_mesh.process_ids(), process_ids);
-  EXPECT_EQ(process_mesh.dim_names()[0], "x");
-  EXPECT_EQ(process_mesh.dim_names()[1], "y");
-  EXPECT_EQ(process_mesh.size(), size);
-  EXPECT_EQ(process_mesh.ndim(), static_cast<int64_t>(shape.size()));
-  EXPECT_EQ(process_mesh.dim_size(0), shape[0]);
-  EXPECT_EQ(process_mesh.dim_size(-1), shape[1]);
-  EXPECT_EQ(process_mesh.dim_size("x"), shape[0]);
-  EXPECT_EQ(process_mesh.dim_size("y"), shape[1]);
-  EXPECT_EQ(process_mesh.empty(), false);
-  EXPECT_EQ(process_mesh.contains(0), true);
-  EXPECT_EQ(process_mesh.contains(6), false);
-  std::stringstream sstream;
-  sstream << process_mesh;
-  EXPECT_EQ(sstream.str(), process_mesh.to_string());
-  auto proto = process_mesh.to_proto();
-  ProcessMesh new_process_mesh = ProcessMesh::from_proto(proto);
-  EXPECT_EQ(process_mesh, new_process_mesh);
-  std::cout << new_process_mesh << std::endl;
-}
-
-}  // namespace auto_parallel
-}  // namespace distributed
-}  // namespace paddle

From 00b3aafad6c6623c04bfadc3d79810f6697d7036 Mon Sep 17 00:00:00 2001
From: Yulong Ao <aoyulong@baidu.com>
Date: Sun, 7 Aug 2022 11:58:02 +0000
Subject: [PATCH 3/3] [Auto Parallel] Comment out unnecessary cmake statements

---
 .../distributed/auto_parallel/CMakeLists.txt  | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt
index cfcc12515da91..192871c73c9a4 100644
--- a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt
+++ b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt
@@ -7,14 +7,14 @@ cc_test(
   SRCS device_mesh_test.cc
   DEPS device_mesh)
 
-cc_library(
-  process_mesh
-  SRCS process_mesh.cc
-  DEPS auto_parallel_proto)
-cc_test(
-  process_mesh_test
-  SRCS process_mesh_test.cc
-  DEPS process_mesh)
+# cc_library(
+#   process_mesh
+#   SRCS process_mesh.cc
+#   DEPS auto_parallel_proto)
+# cc_test(
+#   process_mesh_test
+#   SRCS process_mesh_test.cc
+#   DEPS process_mesh)
 
 # cc_library(
 #   dist_attr
@@ -25,14 +25,14 @@ cc_test(
 #   SRCS dist_attr_test.cc
 #   DEPS dist_attr)
 
-cc_library(
-  dist_mapper
-  SRCS dist_mapper.cc
-  DEPS device_mesh auto_parallel_proto)
-cc_test(
-  dist_mapper_test
-  SRCS dist_mapper_test.cc
-  DEPS dist_mapper)
+# cc_library(
+#   dist_mapper
+#   SRCS dist_mapper.cc
+#   DEPS device_mesh auto_parallel_proto)
+# cc_test(
+#   dist_mapper_test
+#   SRCS dist_mapper_test.cc
+#   DEPS dist_mapper)
 
 proto_library(auto_parallel_proto SRCS auto_parallel.proto)