Skip to content

Commit

Permalink
Fix
Browse files Browse the repository at this point in the history
  • Loading branch information
co63oc committed Jan 11, 2024
1 parent f968050 commit 4b5855f
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 12 deletions.
2 changes: 1 addition & 1 deletion python/paddle/distributed/auto_parallel/static/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,7 @@ def _build_from_dict(self, cluster_info):
cluster_info.get("alpha_latency")
)
else:
self._alpha_latecy = None
self._alpha_latency = None

def build_from_file(self, json_file_path):
with open(json_file_path) as json_file:
Expand Down
8 changes: 4 additions & 4 deletions python/paddle/distributed/auto_parallel/static/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1643,7 +1643,7 @@ def _is_grad_var_name(name):
def _get_forward_varname_from_grad_varname(grad_var_name):
assert _is_grad_var_name(
grad_var_name
), f"[{grad_var_name}] is not a grad varnme."
), f"[{grad_var_name}] is not a grad var name."
return grad_var_name[: grad_var_name.find("@GRAD")]

def _get_op_by_id(ops, id):
Expand Down Expand Up @@ -1769,7 +1769,7 @@ def _complete_grad_op_with_forward_op(forward_op, grad_op, vars):
def infer_backward_op_partial_status(
vars, grad_op, grad_op_dist_attr
):
# NOTE Since we use composite op in static mode which might have implicit Reduction of broadcast axes for caculating parameter's gradient.
# NOTE Since we use composite op in static mode which might have implicit Reduction of broadcast axes for calculating parameter's gradient.
# Those implicit Reduction hinder the Partial inference in a normal way, and we need a special method to handle it.
param_grads = []
activation_grad = None
Expand Down Expand Up @@ -1993,7 +1993,7 @@ def infer_backward_op_partial_status(
output_name, ref_fwd_dims_mapping
)
# NOTE(zhaoyingli):
# The sum op is used to accmulate the grads' value of the same forward var,
# The sum op is used to accumulate the grads' value of the same forward var,
# sum op's chunk_id is same with the last op which generate the grad.
ref_chunk_id = None
ref_process_mesh = None
Expand Down Expand Up @@ -2336,7 +2336,7 @@ def _init_global_mesh_for_program(self):
assert dist_op is not None
dist_op.dist_attr.process_mesh = ProcessMesh(world_ranks)

# Find the most compatible implemenetations from the distributed operator
# Find the most compatible implementations from the distributed operator
op_dist_impls = find_compatible_distributed_operator_impls(
dist_op, fwd=True
)
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/distributed/auto_parallel/static/dist_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ def get_local_offsets(
global_sizes, dims_mapping, topology, processes, rank, shard_sizes
)
local_offsets = []
rank_relatvie = processes.index(rank)
coordinate = _linear_idx2coordinate(topology, rank_relatvie)
rank_relative = processes.index(rank)
coordinate = _linear_idx2coordinate(topology, rank_relative)

for i in range(len(global_sizes)):
if dims_mapping[i] == -1:
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/distributed/auto_parallel/static/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
class ProxyLayer(Layer):
"""
ProxyLayer implements all logic for converting dygraph model into
static Program IR. Meanwhile, it provides conviential interfaces for
static Program IR. Meanwhile, it provides conventional interfaces for
auto parallel to visit feed/fetch/loss/metric variables.
"""

Expand Down
8 changes: 4 additions & 4 deletions python/paddle/distributed/auto_parallel/static/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,8 @@ def _get_comm_group(processes, shape, axis, rank):
assert (
rank in processes
), f"rank [{rank}] is NOT in processes group {processes}"
rank_relatvie = processes.index(rank)
coordinate = _linear_idx2coordinate(shape, rank_relatvie)
rank_relative = processes.index(rank)
coordinate = _linear_idx2coordinate(shape, rank_relative)
coordinates_in_group = [coordinate[:] for i in range(shape[axis])]

# select comm group
Expand Down Expand Up @@ -328,8 +328,8 @@ def _get_idx_in_axis(processes, shape, axis, rank):

# NOTE _linear_idx2coordinate assume processes mesh start with 0 and continuous
# tricks to support processes mesh when it is not start with 0 or continuous
rank_relatvie = processes.index(rank)
coordinate = _linear_idx2coordinate(shape, rank_relatvie)
rank_relative = processes.index(rank)
coordinate = _linear_idx2coordinate(shape, rank_relative)
return coordinate[axis]


Expand Down

0 comments on commit 4b5855f

Please sign in to comment.