diff --git a/runhouse/servers/env_servlet.py b/runhouse/servers/env_servlet.py index fc8e29fa4..941104dc3 100644 --- a/runhouse/servers/env_servlet.py +++ b/runhouse/servers/env_servlet.py @@ -1,4 +1,5 @@ import logging +import os import traceback from functools import wraps from typing import Any, Dict, Optional @@ -69,6 +70,20 @@ async def __init__(self, env_name: str, *args, **kwargs): setup_cluster_servlet=ClusterServletSetupOption.GET_OR_FAIL, ) + # Ray defaults to setting OMP_NUM_THREADS to 1, which unexpectedly limit parallelism in user programs. + # We delete it by default, but if we find that the user explicitly set it to another value, we respect that. + # This is really only a factor if the user set the value inside the VM or container, or inside the base_env + # which a cluster was initialized with. If they set it inside the env constructor and the env was sent to the + # cluster normally with .to, it will be set after this point. + # TODO this had no effect when we did it below where we set CUDA_VISIBLE_DEVICES, so we may need to move that + # here and mirror the same behavior (setting it based on the detected gpus in the whole cluster may not work + # for multinode, but popping it may also break things, it needs to be tested). + num_threads = os.environ.get("OMP_NUM_THREADS") + if num_threads is not None and num_threads != "1": + os.environ["OMP_NUM_THREADS"] = num_threads + else: + os.environ["OMP_NUM_THREADS"] = "" + self.output_types = {} self.thread_ids = {} diff --git a/runhouse/servers/obj_store.py b/runhouse/servers/obj_store.py index f0cb81ef2..7e45cfacb 100644 --- a/runhouse/servers/obj_store.py +++ b/runhouse/servers/obj_store.py @@ -135,20 +135,6 @@ def __init__(self): self.installed_envs = {} # TODO: consider deleting it? self._kv_store: Dict[Any, Any] = None - # Ray defaults to setting OMP_NUM_THREADS to 1, which unexpectedly limit parallelism in user programs. - # We delete it by default, but if we find that the user explicitly set it to another value, we respect that. - # This is really only a factor if the user set the value inside the VM or container, or inside the base_env - # which a cluster was initialized with. If they set it inside the env constructor and the env was sent to the - # cluster normally with .to, it will be set after this point. - # TODO this had no effect when we did it below where we set CUDA_VISIBLE_DEVICES, so we may need to move that - # here and mirror the same behavior (setting it based on the detected gpus in the whole cluster may not work - # for multinode, but popping it may also break things, it needs to be tested). - num_threads = os.environ.get("OMP_NUM_THREADS") - if num_threads is not None and num_threads != "1": - os.environ["OMP_NUM_THREADS"] = num_threads - else: - os.environ["OMP_NUM_THREADS"] = "" - async def ainitialize( self, servlet_name: Optional[str] = None,