fix some bug in dpo

PaddlePaddle · Sep 18, 2024 · cc28617 · cc28617
1 parent c4ef7da
commit cc28617
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 4 deletions.
diff --git a/llm/alignment/dpo/dpo_argument.py b/llm/alignment/dpo/dpo_argument.py
@@ -95,3 +95,7 @@ class DPOModelArgument:
         default=False,
         metadata={"help": "whether to use sequence parallel"},
     )
+    tensor_parallel_output: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether to output logits in distributed status"},
+    )
diff --git a/llm/alignment/dpo/run_dpo.py b/llm/alignment/dpo/run_dpo.py
@@ -116,7 +116,7 @@ def main():
         tensor_parallel_rank=training_args.tensor_parallel_rank,
         recompute_granularity=model_args.recompute_granularity,
         use_flash_attention=model_args.use_flash_attention,
-        tensor_parallel_output=True,
+        tensor_parallel_output=model_args.tensor_parallel_output,
     )
     if training_args.pipeline_parallel_degree > 1:
         raise ValueError("DPO does not support pipeline parallelism yet.")

diff --git a/paddlenlp/transformers/gemma/modeling.py b/paddlenlp/transformers/gemma/modeling.py
@@ -1558,9 +1558,7 @@ def forward(
 
         # if labels is None，means we need full output, instead of tensor_parallel_output
         # tensor_parallel_output is togather with ParallelCrossEntropy
-        tensor_parallel_output = (
-            self.config.tensor_parallel_output and labels is not None and self.config.tensor_parallel_degree > 1
-        )
+        tensor_parallel_output = self.config.tensor_parallel_output and self.config.tensor_parallel_degree > 1
 
         logits = self.lm_head(hidden_states, tensor_parallel_output=tensor_parallel_output)