Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix rank_relatvie rank_relative #60770

Merged
merged 1 commit into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/paddle/distributed/auto_parallel/static/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,7 @@ def _build_from_dict(self, cluster_info):
cluster_info.get("alpha_latency")
)
else:
self._alpha_latecy = None
self._alpha_latency = None

def build_from_file(self, json_file_path):
with open(json_file_path) as json_file:
Expand Down
8 changes: 4 additions & 4 deletions python/paddle/distributed/auto_parallel/static/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1643,7 +1643,7 @@ def _is_grad_var_name(name):
def _get_forward_varname_from_grad_varname(grad_var_name):
assert _is_grad_var_name(
grad_var_name
), f"[{grad_var_name}] is not a grad varnme."
), f"[{grad_var_name}] is not a grad var name."
return grad_var_name[: grad_var_name.find("@GRAD")]

def _get_op_by_id(ops, id):
Expand Down Expand Up @@ -1769,7 +1769,7 @@ def _complete_grad_op_with_forward_op(forward_op, grad_op, vars):
def infer_backward_op_partial_status(
vars, grad_op, grad_op_dist_attr
):
# NOTE Since we use composite op in static mode which might have implicit Reduction of broadcast axes for caculating parameter's gradient.
# NOTE Since we use composite op in static mode which might have implicit Reduction of broadcast axes for calculating parameter's gradient.
# Those implicit Reduction hinder the Partial inference in a normal way, and we need a special method to handle it.
param_grads = []
activation_grad = None
Expand Down Expand Up @@ -1993,7 +1993,7 @@ def infer_backward_op_partial_status(
output_name, ref_fwd_dims_mapping
)
# NOTE(zhaoyingli):
# The sum op is used to accmulate the grads' value of the same forward var,
# The sum op is used to accumulate the grads' value of the same forward var,
# sum op's chunk_id is same with the last op which generate the grad.
ref_chunk_id = None
ref_process_mesh = None
Expand Down Expand Up @@ -2336,7 +2336,7 @@ def _init_global_mesh_for_program(self):
assert dist_op is not None
dist_op.dist_attr.process_mesh = ProcessMesh(world_ranks)

# Find the most compatible implemenetations from the distributed operator
# Find the most compatible implementations from the distributed operator
op_dist_impls = find_compatible_distributed_operator_impls(
dist_op, fwd=True
)
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/distributed/auto_parallel/static/dist_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ def get_local_offsets(
global_sizes, dims_mapping, topology, processes, rank, shard_sizes
)
local_offsets = []
rank_relatvie = processes.index(rank)
coordinate = _linear_idx2coordinate(topology, rank_relatvie)
rank_relative = processes.index(rank)
coordinate = _linear_idx2coordinate(topology, rank_relative)

for i in range(len(global_sizes)):
if dims_mapping[i] == -1:
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/distributed/auto_parallel/static/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
class ProxyLayer(Layer):
"""
ProxyLayer implements all logic for converting dygraph model into
static Program IR. Meanwhile, it provides conviential interfaces for
static Program IR. Meanwhile, it provides conventional interfaces for
auto parallel to visit feed/fetch/loss/metric variables.
"""

Expand Down
8 changes: 4 additions & 4 deletions python/paddle/distributed/auto_parallel/static/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,8 @@ def _get_comm_group(processes, shape, axis, rank):
assert (
rank in processes
), f"rank [{rank}] is NOT in processes group {processes}"
rank_relatvie = processes.index(rank)
coordinate = _linear_idx2coordinate(shape, rank_relatvie)
rank_relative = processes.index(rank)
coordinate = _linear_idx2coordinate(shape, rank_relative)
coordinates_in_group = [coordinate[:] for i in range(shape[axis])]

# select comm group
Expand Down Expand Up @@ -328,8 +328,8 @@ def _get_idx_in_axis(processes, shape, axis, rank):

# NOTE _linear_idx2coordinate assume processes mesh start with 0 and continuous
# tricks to support processes mesh when it is not start with 0 or continuous
rank_relatvie = processes.index(rank)
coordinate = _linear_idx2coordinate(shape, rank_relatvie)
rank_relative = processes.index(rank)
coordinate = _linear_idx2coordinate(shape, rank_relative)
return coordinate[axis]


Expand Down