Update test_RN50_external_source_parallel_train_ddp.py to work with t…

…he latest PyTorch - updates the invocation of test_RN50_external_source_parallel_train_ddp.py test to use torchrun and propagate local rank through the env variable Signed-off-by: Janusz Lisiecki <jlisiecki@nvidia.com>
NVIDIA · Dec 4, 2023 · 31ca1f3 · 31ca1f3
1 parent d18879c
commit 31ca1f3
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 12 deletions.
diff --git a/dali/test/python/test_RN50_external_source_parallel_utils.py b/dali/test/python/test_RN50_external_source_parallel_utils.py
@@ -396,17 +396,17 @@ def parse_test_arguments(supports_distributed):
         "with tensor.gpu()",
     )
 
-    if supports_distributed:
-        parser.add_argument(
-            "--local_rank",
-            default=0,
-            type=int,
-            help="Id of the local rank in distributed scenario.",
-        )
-    else:
+    if not supports_distributed:
         parser.add_argument("-g", "--gpus", default=1, type=int, metavar="N", help="number of GPUs")
     args = parser.parse_args()
 
+    if 'WORLD_SIZE' in os.environ:
+        args.distributed = int(os.environ['WORLD_SIZE']) > 1
+    if 'LOCAL_RANK' in os.environ:
+        args.local_rank = int(os.environ['LOCAL_RANK'])
+    else:
+        args.local_rank = 0
+
     if supports_distributed:
         print(
             "GPU ID: {}, batch: {}, epochs: {}, workers: {}, py_workers: {}, prefetch depth: {}, "

diff --git a/qa/TL2_FW_iterators_perf/test_pytorch.sh b/qa/TL2_FW_iterators_perf/test_pytorch.sh
@@ -13,10 +13,10 @@ test_body() {
         python test_RN50_data_fw_iterators.py --framework ${fw} --gpus ${NUM_GPUS} -b 13 \
             --workers 3 --prefetch 2 --epochs 3
     done
-    python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} ./test_RN50_external_source_parallel_train_ddp.py /data/imagenet/train-jpeg/ --workers 6 --py_workers 6 --epochs 3 --batch_size 256 --reader_queue_depth 2 --worker_init fork --benchmark_iters 500 --test_pipes parallel
-    python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} ./test_RN50_external_source_parallel_train_ddp.py /data/imagenet/train-jpeg/ --workers 6 --py_workers 6 --epochs 3 --batch_size 256 --reader_queue_depth 2 --worker_init spawn --benchmark_iters 500 --test_pipes parallel
-    python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} ./test_RN50_external_source_parallel_train_ddp.py /data/imagenet/train-jpeg/ --workers 6 --py_workers 6 --epochs 3 --batch_size 256 --reader_queue_depth 2 --benchmark_iters 500 --test_pipes file_reader
-    python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} ./test_RN50_external_source_parallel_train_ddp.py /data/imagenet/train-jpeg/ --workers 6 --py_workers 6 --epochs 3 --batch_size 256 --reader_queue_depth 2 --benchmark_iters 250 --test_pipes scalar
+    torchrun --nproc_per_node=${NUM_GPUS} ./test_RN50_external_source_parallel_train_ddp.py /data/imagenet/train-jpeg/ --workers 6 --py_workers 6 --epochs 3 --batch_size 256 --reader_queue_depth 2 --worker_init fork --benchmark_iters 500 --test_pipes parallel
+    torchrun --nproc_per_node=${NUM_GPUS} ./test_RN50_external_source_parallel_train_ddp.py /data/imagenet/train-jpeg/ --workers 6 --py_workers 6 --epochs 3 --batch_size 256 --reader_queue_depth 2 --worker_init spawn --benchmark_iters 500 --test_pipes parallel
+    torchrun --nproc_per_node=${NUM_GPUS} ./test_RN50_external_source_parallel_train_ddp.py /data/imagenet/train-jpeg/ --workers 6 --py_workers 6 --epochs 3 --batch_size 256 --reader_queue_depth 2 --benchmark_iters 500 --test_pipes file_reader
+    torchrun --nproc_per_node=${NUM_GPUS} ./test_RN50_external_source_parallel_train_ddp.py /data/imagenet/train-jpeg/ --workers 6 --py_workers 6 --epochs 3 --batch_size 256 --reader_queue_depth 2 --benchmark_iters 250 --test_pipes scalar
 }
 
 pushd ../..