PaddlePaddle · luotao1 · Jan 12, 2024 · Jan 11, 2024
diff --git a/python/paddle/distributed/auto_parallel/static/cluster.py b/python/paddle/distributed/auto_parallel/static/cluster.py
@@ -741,7 +741,7 @@ def _build_from_dict(self, cluster_info):
                 cluster_info.get("alpha_latency")
             )
         else:
-            self._alpha_latecy = None
+            self._alpha_latency = None
 
     def build_from_file(self, json_file_path):
         with open(json_file_path) as json_file:

diff --git a/python/paddle/distributed/auto_parallel/static/completion.py b/python/paddle/distributed/auto_parallel/static/completion.py
@@ -1643,7 +1643,7 @@ def _is_grad_var_name(name):
         def _get_forward_varname_from_grad_varname(grad_var_name):
             assert _is_grad_var_name(
                 grad_var_name
-            ), f"[{grad_var_name}] is not a grad varnme."
+            ), f"[{grad_var_name}] is not a grad var name."
             return grad_var_name[: grad_var_name.find("@GRAD")]
 
         def _get_op_by_id(ops, id):
@@ -1769,7 +1769,7 @@ def _complete_grad_op_with_forward_op(forward_op, grad_op, vars):
             def infer_backward_op_partial_status(
                 vars, grad_op, grad_op_dist_attr
             ):
-                # NOTE Since we use composite op in static mode which might have implicit Reduction of broadcast axes for caculating parameter's gradient.
+                # NOTE Since we use composite op in static mode which might have implicit Reduction of broadcast axes for calculating parameter's gradient.
                 # Those implicit Reduction hinder the Partial inference in a normal way, and we need a special method to handle it.
                 param_grads = []
                 activation_grad = None
@@ -1993,7 +1993,7 @@ def infer_backward_op_partial_status(
                         output_name, ref_fwd_dims_mapping
                     )
                     # NOTE(zhaoyingli):
-                    # The sum op is used to accmulate the grads' value of the same forward var,
+                    # The sum op is used to accumulate the grads' value of the same forward var,
                     # sum op's chunk_id is same with the last op which generate the grad.
                     ref_chunk_id = None
                     ref_process_mesh = None
@@ -2336,7 +2336,7 @@ def _init_global_mesh_for_program(self):
                 assert dist_op is not None
                 dist_op.dist_attr.process_mesh = ProcessMesh(world_ranks)
 
-                # Find the most compatible implemenetations from the distributed operator
+                # Find the most compatible implementations from the distributed operator
                 op_dist_impls = find_compatible_distributed_operator_impls(
                     dist_op, fwd=True
                 )

diff --git a/python/paddle/distributed/auto_parallel/static/dist_tensor.py b/python/paddle/distributed/auto_parallel/static/dist_tensor.py
@@ -112,8 +112,8 @@ def get_local_offsets(
             global_sizes, dims_mapping, topology, processes, rank, shard_sizes
         )
         local_offsets = []
-        rank_relatvie = processes.index(rank)
-        coordinate = _linear_idx2coordinate(topology, rank_relatvie)
+        rank_relative = processes.index(rank)
+        coordinate = _linear_idx2coordinate(topology, rank_relative)
 
         for i in range(len(global_sizes)):
             if dims_mapping[i] == -1:

diff --git a/python/paddle/distributed/auto_parallel/static/helper.py b/python/paddle/distributed/auto_parallel/static/helper.py
@@ -36,7 +36,7 @@
 class ProxyLayer(Layer):
     """
     ProxyLayer implements all logic for converting dygraph model into
-    static Program IR. Meanwhile, it provides conviential interfaces for
+    static Program IR. Meanwhile, it provides conventional interfaces for
     auto parallel to visit feed/fetch/loss/metric variables.
     """
 

diff --git a/python/paddle/distributed/auto_parallel/static/utils.py b/python/paddle/distributed/auto_parallel/static/utils.py
@@ -297,8 +297,8 @@ def _get_comm_group(processes, shape, axis, rank):
     assert (
         rank in processes
     ), f"rank [{rank}] is NOT in processes group {processes}"
-    rank_relatvie = processes.index(rank)
-    coordinate = _linear_idx2coordinate(shape, rank_relatvie)
+    rank_relative = processes.index(rank)
+    coordinate = _linear_idx2coordinate(shape, rank_relative)
     coordinates_in_group = [coordinate[:] for i in range(shape[axis])]
 
     # select comm group
@@ -328,8 +328,8 @@ def _get_idx_in_axis(processes, shape, axis, rank):
 
     # NOTE _linear_idx2coordinate assume processes mesh start with 0 and continuous
     #  tricks to support processes mesh when it is not start with 0 or continuous
-    rank_relatvie = processes.index(rank)
-    coordinate = _linear_idx2coordinate(shape, rank_relatvie)
+    rank_relative = processes.index(rank)
+    coordinate = _linear_idx2coordinate(shape, rank_relative)
     return coordinate[axis]