From 4b5855f541c931b9885c6de2b8644dcf77083c7d Mon Sep 17 00:00:00 2001 From: co63oc Date: Fri, 12 Jan 2024 07:56:21 +0800 Subject: [PATCH] Fix --- python/paddle/distributed/auto_parallel/static/cluster.py | 2 +- .../paddle/distributed/auto_parallel/static/completion.py | 8 ++++---- .../distributed/auto_parallel/static/dist_tensor.py | 4 ++-- python/paddle/distributed/auto_parallel/static/helper.py | 2 +- python/paddle/distributed/auto_parallel/static/utils.py | 8 ++++---- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/python/paddle/distributed/auto_parallel/static/cluster.py b/python/paddle/distributed/auto_parallel/static/cluster.py index e5eb7b25002e6..da1d6eed20c78 100644 --- a/python/paddle/distributed/auto_parallel/static/cluster.py +++ b/python/paddle/distributed/auto_parallel/static/cluster.py @@ -741,7 +741,7 @@ def _build_from_dict(self, cluster_info): cluster_info.get("alpha_latency") ) else: - self._alpha_latecy = None + self._alpha_latency = None def build_from_file(self, json_file_path): with open(json_file_path) as json_file: diff --git a/python/paddle/distributed/auto_parallel/static/completion.py b/python/paddle/distributed/auto_parallel/static/completion.py index 692d02b7563c6..88731d6fa6096 100644 --- a/python/paddle/distributed/auto_parallel/static/completion.py +++ b/python/paddle/distributed/auto_parallel/static/completion.py @@ -1643,7 +1643,7 @@ def _is_grad_var_name(name): def _get_forward_varname_from_grad_varname(grad_var_name): assert _is_grad_var_name( grad_var_name - ), f"[{grad_var_name}] is not a grad varnme." + ), f"[{grad_var_name}] is not a grad var name." return grad_var_name[: grad_var_name.find("@GRAD")] def _get_op_by_id(ops, id): @@ -1769,7 +1769,7 @@ def _complete_grad_op_with_forward_op(forward_op, grad_op, vars): def infer_backward_op_partial_status( vars, grad_op, grad_op_dist_attr ): - # NOTE Since we use composite op in static mode which might have implicit Reduction of broadcast axes for caculating parameter's gradient. + # NOTE Since we use composite op in static mode which might have implicit Reduction of broadcast axes for calculating parameter's gradient. # Those implicit Reduction hinder the Partial inference in a normal way, and we need a special method to handle it. param_grads = [] activation_grad = None @@ -1993,7 +1993,7 @@ def infer_backward_op_partial_status( output_name, ref_fwd_dims_mapping ) # NOTE(zhaoyingli): - # The sum op is used to accmulate the grads' value of the same forward var, + # The sum op is used to accumulate the grads' value of the same forward var, # sum op's chunk_id is same with the last op which generate the grad. ref_chunk_id = None ref_process_mesh = None @@ -2336,7 +2336,7 @@ def _init_global_mesh_for_program(self): assert dist_op is not None dist_op.dist_attr.process_mesh = ProcessMesh(world_ranks) - # Find the most compatible implemenetations from the distributed operator + # Find the most compatible implementations from the distributed operator op_dist_impls = find_compatible_distributed_operator_impls( dist_op, fwd=True ) diff --git a/python/paddle/distributed/auto_parallel/static/dist_tensor.py b/python/paddle/distributed/auto_parallel/static/dist_tensor.py index 99f3b845836a4..b15218d47426b 100644 --- a/python/paddle/distributed/auto_parallel/static/dist_tensor.py +++ b/python/paddle/distributed/auto_parallel/static/dist_tensor.py @@ -112,8 +112,8 @@ def get_local_offsets( global_sizes, dims_mapping, topology, processes, rank, shard_sizes ) local_offsets = [] - rank_relatvie = processes.index(rank) - coordinate = _linear_idx2coordinate(topology, rank_relatvie) + rank_relative = processes.index(rank) + coordinate = _linear_idx2coordinate(topology, rank_relative) for i in range(len(global_sizes)): if dims_mapping[i] == -1: diff --git a/python/paddle/distributed/auto_parallel/static/helper.py b/python/paddle/distributed/auto_parallel/static/helper.py index 600e9821ca114..8dcb2563336f8 100644 --- a/python/paddle/distributed/auto_parallel/static/helper.py +++ b/python/paddle/distributed/auto_parallel/static/helper.py @@ -36,7 +36,7 @@ class ProxyLayer(Layer): """ ProxyLayer implements all logic for converting dygraph model into - static Program IR. Meanwhile, it provides conviential interfaces for + static Program IR. Meanwhile, it provides conventional interfaces for auto parallel to visit feed/fetch/loss/metric variables. """ diff --git a/python/paddle/distributed/auto_parallel/static/utils.py b/python/paddle/distributed/auto_parallel/static/utils.py index 359767c7345e8..fdc8b2a28ed47 100644 --- a/python/paddle/distributed/auto_parallel/static/utils.py +++ b/python/paddle/distributed/auto_parallel/static/utils.py @@ -297,8 +297,8 @@ def _get_comm_group(processes, shape, axis, rank): assert ( rank in processes ), f"rank [{rank}] is NOT in processes group {processes}" - rank_relatvie = processes.index(rank) - coordinate = _linear_idx2coordinate(shape, rank_relatvie) + rank_relative = processes.index(rank) + coordinate = _linear_idx2coordinate(shape, rank_relative) coordinates_in_group = [coordinate[:] for i in range(shape[axis])] # select comm group @@ -328,8 +328,8 @@ def _get_idx_in_axis(processes, shape, axis, rank): # NOTE _linear_idx2coordinate assume processes mesh start with 0 and continuous # tricks to support processes mesh when it is not start with 0 or continuous - rank_relatvie = processes.index(rank) - coordinate = _linear_idx2coordinate(shape, rank_relatvie) + rank_relative = processes.index(rank) + coordinate = _linear_idx2coordinate(shape, rank_relative) return coordinate[axis]