From bfbe9013a0d63e3239682803d5e603a998ef25b4 Mon Sep 17 00:00:00 2001 From: Caroline Chen Date: Wed, 20 Mar 2024 15:23:43 -0400 Subject: [PATCH] Deprecate system arg from function/module factory functions (#625) --- runhouse/resources/functionals/mapper.py | 2 +- .../resources/functions/function_factory.py | 18 ++++++------- runhouse/resources/module.py | 25 +++++++++---------- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/runhouse/resources/functionals/mapper.py b/runhouse/resources/functionals/mapper.py index 7cac1c73e..0d7f6d8d8 100644 --- a/runhouse/resources/functionals/mapper.py +++ b/runhouse/resources/functionals/mapper.py @@ -233,7 +233,7 @@ def mapper( >>> remote_fn = rh.function(local_fn).to(cluster) >>> mapper = rh.mapper(remote_fn, replicas=2) - >>> remote_module = rh.module(cls=MyClass, system=cluster, env="my_env") + >>> remote_module = rh.module(cls=MyClass).to(system=cluster, env="my_env") >>> mapper = rh.mapper(remote_module, method=my_class_method, replicas=-1) """ diff --git a/runhouse/resources/functions/function_factory.py b/runhouse/resources/functions/function_factory.py index 71614d050..055e82e2a 100644 --- a/runhouse/resources/functions/function_factory.py +++ b/runhouse/resources/functions/function_factory.py @@ -5,7 +5,7 @@ from runhouse.resources.envs import _get_env_from, Env from runhouse.resources.functions.function import Function -from runhouse.resources.hardware import _get_cluster_from, Cluster +from runhouse.resources.hardware import Cluster from runhouse.resources.packages import git_package logger = logging.getLogger(__name__) @@ -14,7 +14,7 @@ def function( fn: Optional[Union[str, Callable]] = None, name: Optional[str] = None, - system: Optional[Union[str, Cluster]] = None, + system: Optional[Union[str, Cluster]] = None, # deprecated env: Optional[Union[List[str], Env, str]] = None, dryrun: bool = False, load_secrets: bool = False, @@ -28,8 +28,6 @@ def function( fn (Optional[str or Callable]): The function to execute on the remote system when the function is called. name (Optional[str]): Name of the Function to create or retrieve. This can be either from a local config or from the RNS. - system (Optional[str or Cluster]): Hardware (cluster) on which to execute the Function. - This can be either the string name of a Cluster object, or a Cluster object. env (Optional[List[str] or Env or str]): List of requirements to install on the remote cluster, or path to the requirements.txt file, or Env object or string name of an Env object. dryrun (bool): Whether to create the Function if it doesn't exist, or load the Function object as a dryrun. @@ -61,6 +59,12 @@ def function( # Try reloading existing function return Function.from_name(name, dryrun) + if system: + raise Exception( + "`system` argument is no longer supported in function factory function. " + "Use `.to(system=system)` after construction to send the function to the system." + ) + if not isinstance(env, Env): env = _get_env_from(env) or Env(working_dir="./", name=Env.DEFAULT_NAME) @@ -105,11 +109,7 @@ def function( ) env.reqs = [repo_package] + env.reqs - system = _get_cluster_from(system) - - new_function = Function( - fn_pointers=fn_pointers, name=name, dryrun=dryrun, system=system, env=env - ) + new_function = Function(fn_pointers=fn_pointers, name=name, dryrun=dryrun, env=env) if load_secrets and not dryrun: new_function.send_secrets() diff --git a/runhouse/resources/module.py b/runhouse/resources/module.py index d1e525d34..0f272dcb4 100644 --- a/runhouse/resources/module.py +++ b/runhouse/resources/module.py @@ -1169,7 +1169,7 @@ def __call__( def module( cls: [Type] = None, name: Optional[str] = None, - system: Optional[Union[str, Cluster]] = None, + system: Optional[Union[str, Cluster]] = None, # deprecated env: Optional[Union[str, Env]] = None, dryrun: bool = False, ): @@ -1196,11 +1196,6 @@ class (e.g. ``to``, ``fetch``, etc.). Properties and private methods are not int Args: cls: The class to instantiate. name (Optional[str]): Name to give the module object, to be reused later on. - system (Optional[str or Cluster]): File system or cluster name. If providing a file system this must be one of: - [``file``, ``github``, ``sftp``, ``ssh``, ``s3``, ``gs``, ``azure``]. - We are working to add additional file system support. If providing a cluster, this must be a cluster object - or name, and whether the data is saved to the object store or filesystem depends on whether a path is - specified. env (Optional[str or Env]): Environment in which the module should live on the cluster, if system is cluster. dryrun (bool): Whether to create the Blob if it doesn't exist, or load a Blob object as a dryrun. (Default: ``False``) @@ -1214,11 +1209,11 @@ class (e.g. ``to``, ``fetch``, etc.). Properties and private methods are not int >>> >>> # Sample rh.Module class >>> class Model(rh.Module): - >>> def __init__(self, model_id, device="cpu", system=None, env=None): + >>> def __init__(self, model_id, device="cpu", env=None): >>> # Note that the code here will be run in your local environment prior to being sent to >>> # to a cluster. For loading large models/datasets that are only meant to be used remotely, >>> # we recommend using lazy initialization (see tokenizer and model attributes below). - >>> super().__init__(system=system, env=env) + >>> super().__init__(env=env) >>> self.model_id = model_id >>> self.device = device >>> @@ -1240,7 +1235,8 @@ class (e.g. ``to``, ``fetch``, etc.). Properties and private methods are not int >>> return self.model(x) >>> # Creating rh.Module instance - >>> model = Model(model_id="bert-base-uncased", device="cuda", system="my_gpu", env="my_env") + >>> model = Model(model_id="bert-base-uncased", device="cuda", env="my_env") + >>> model = model.to(system="my_gpu") >>> model.predict("Hello world!") # Runs on system in env >>> tok = model.remote.tokenizer # Returns remote tokenizer >>> id = model.local.model_id # Returns local model_id, if any @@ -1252,8 +1248,8 @@ class (e.g. ``to``, ``fetch``, etc.). Properties and private methods are not int >>> other_model = Model(model_id="bert-base-uncased", device="cuda").to("my_gpu", "my_env") >>> >>> # Another method: Create a module instance from an existing non-Module class using rh.module() - >>> RemoteModel = rh.module(cls=BERTModel, system="my_gpu", env="my_env") - >>> remote_model = RemoteModel(model_id="bert-base-uncased", device="cuda") + >>> RemoteModel = rh.module(cls=BERTModel, env="my_env") + >>> remote_model = RemoteModel(model_id="bert-base-uncased", device="cuda").to(system="my_gpu") >>> remote_model.predict("Hello world!") # Runs on system in env >>> >>> # You can also call remote class methods @@ -1267,7 +1263,11 @@ class (e.g. ``to``, ``fetch``, etc.). Properties and private methods are not int # Try reloading existing module return Module.from_name(name, dryrun) - system = _get_cluster_from(system or _current_cluster(key="config"), dryrun=dryrun) + if system: + raise Exception( + "`system` argument is no longer supported in the module factory function. " + "Use `.to(system)` or `.get_or_to(system)` after construction to send and run the Module on the system." + ) if not isinstance(env, Env): env = _get_env_from(env) or Env(name=Env.DEFAULT_NAME) @@ -1280,7 +1280,6 @@ class (e.g. ``to``, ``fetch``, etc.). Properties and private methods are not int module_subclass = _module_subclass_factory(cls, cls_pointers) return module_subclass._module_init_only( - system=system, env=env, dryrun=dryrun, pointers=cls_pointers,