Skip to content

Commit

Permalink
Deprecate system arg from function/module factory functions (#625)
Browse files Browse the repository at this point in the history
  • Loading branch information
carolineechen committed Mar 20, 2024
1 parent a8889e5 commit bfbe901
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 23 deletions.
2 changes: 1 addition & 1 deletion runhouse/resources/functionals/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def mapper(
>>> remote_fn = rh.function(local_fn).to(cluster)
>>> mapper = rh.mapper(remote_fn, replicas=2)
>>> remote_module = rh.module(cls=MyClass, system=cluster, env="my_env")
>>> remote_module = rh.module(cls=MyClass).to(system=cluster, env="my_env")
>>> mapper = rh.mapper(remote_module, method=my_class_method, replicas=-1)
"""

Expand Down
18 changes: 9 additions & 9 deletions runhouse/resources/functions/function_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from runhouse.resources.envs import _get_env_from, Env
from runhouse.resources.functions.function import Function
from runhouse.resources.hardware import _get_cluster_from, Cluster
from runhouse.resources.hardware import Cluster
from runhouse.resources.packages import git_package

logger = logging.getLogger(__name__)
Expand All @@ -14,7 +14,7 @@
def function(
fn: Optional[Union[str, Callable]] = None,
name: Optional[str] = None,
system: Optional[Union[str, Cluster]] = None,
system: Optional[Union[str, Cluster]] = None, # deprecated
env: Optional[Union[List[str], Env, str]] = None,
dryrun: bool = False,
load_secrets: bool = False,
Expand All @@ -28,8 +28,6 @@ def function(
fn (Optional[str or Callable]): The function to execute on the remote system when the function is called.
name (Optional[str]): Name of the Function to create or retrieve.
This can be either from a local config or from the RNS.
system (Optional[str or Cluster]): Hardware (cluster) on which to execute the Function.
This can be either the string name of a Cluster object, or a Cluster object.
env (Optional[List[str] or Env or str]): List of requirements to install on the remote cluster, or path to the
requirements.txt file, or Env object or string name of an Env object.
dryrun (bool): Whether to create the Function if it doesn't exist, or load the Function object as a dryrun.
Expand Down Expand Up @@ -61,6 +59,12 @@ def function(
# Try reloading existing function
return Function.from_name(name, dryrun)

if system:
raise Exception(
"`system` argument is no longer supported in function factory function. "
"Use `.to(system=system)` after construction to send the function to the system."
)

if not isinstance(env, Env):
env = _get_env_from(env) or Env(working_dir="./", name=Env.DEFAULT_NAME)

Expand Down Expand Up @@ -105,11 +109,7 @@ def function(
)
env.reqs = [repo_package] + env.reqs

system = _get_cluster_from(system)

new_function = Function(
fn_pointers=fn_pointers, name=name, dryrun=dryrun, system=system, env=env
)
new_function = Function(fn_pointers=fn_pointers, name=name, dryrun=dryrun, env=env)

if load_secrets and not dryrun:
new_function.send_secrets()
Expand Down
25 changes: 12 additions & 13 deletions runhouse/resources/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -1169,7 +1169,7 @@ def __call__(
def module(
cls: [Type] = None,
name: Optional[str] = None,
system: Optional[Union[str, Cluster]] = None,
system: Optional[Union[str, Cluster]] = None, # deprecated
env: Optional[Union[str, Env]] = None,
dryrun: bool = False,
):
Expand All @@ -1196,11 +1196,6 @@ class (e.g. ``to``, ``fetch``, etc.). Properties and private methods are not int
Args:
cls: The class to instantiate.
name (Optional[str]): Name to give the module object, to be reused later on.
system (Optional[str or Cluster]): File system or cluster name. If providing a file system this must be one of:
[``file``, ``github``, ``sftp``, ``ssh``, ``s3``, ``gs``, ``azure``].
We are working to add additional file system support. If providing a cluster, this must be a cluster object
or name, and whether the data is saved to the object store or filesystem depends on whether a path is
specified.
env (Optional[str or Env]): Environment in which the module should live on the cluster, if system is cluster.
dryrun (bool): Whether to create the Blob if it doesn't exist, or load a Blob object as a dryrun.
(Default: ``False``)
Expand All @@ -1214,11 +1209,11 @@ class (e.g. ``to``, ``fetch``, etc.). Properties and private methods are not int
>>>
>>> # Sample rh.Module class
>>> class Model(rh.Module):
>>> def __init__(self, model_id, device="cpu", system=None, env=None):
>>> def __init__(self, model_id, device="cpu", env=None):
>>> # Note that the code here will be run in your local environment prior to being sent to
>>> # to a cluster. For loading large models/datasets that are only meant to be used remotely,
>>> # we recommend using lazy initialization (see tokenizer and model attributes below).
>>> super().__init__(system=system, env=env)
>>> super().__init__(env=env)
>>> self.model_id = model_id
>>> self.device = device
>>>
Expand All @@ -1240,7 +1235,8 @@ class (e.g. ``to``, ``fetch``, etc.). Properties and private methods are not int
>>> return self.model(x)
>>> # Creating rh.Module instance
>>> model = Model(model_id="bert-base-uncased", device="cuda", system="my_gpu", env="my_env")
>>> model = Model(model_id="bert-base-uncased", device="cuda", env="my_env")
>>> model = model.to(system="my_gpu")
>>> model.predict("Hello world!") # Runs on system in env
>>> tok = model.remote.tokenizer # Returns remote tokenizer
>>> id = model.local.model_id # Returns local model_id, if any
Expand All @@ -1252,8 +1248,8 @@ class (e.g. ``to``, ``fetch``, etc.). Properties and private methods are not int
>>> other_model = Model(model_id="bert-base-uncased", device="cuda").to("my_gpu", "my_env")
>>>
>>> # Another method: Create a module instance from an existing non-Module class using rh.module()
>>> RemoteModel = rh.module(cls=BERTModel, system="my_gpu", env="my_env")
>>> remote_model = RemoteModel(model_id="bert-base-uncased", device="cuda")
>>> RemoteModel = rh.module(cls=BERTModel, env="my_env")
>>> remote_model = RemoteModel(model_id="bert-base-uncased", device="cuda").to(system="my_gpu")
>>> remote_model.predict("Hello world!") # Runs on system in env
>>>
>>> # You can also call remote class methods
Expand All @@ -1267,7 +1263,11 @@ class (e.g. ``to``, ``fetch``, etc.). Properties and private methods are not int
# Try reloading existing module
return Module.from_name(name, dryrun)

system = _get_cluster_from(system or _current_cluster(key="config"), dryrun=dryrun)
if system:
raise Exception(
"`system` argument is no longer supported in the module factory function. "
"Use `.to(system)` or `.get_or_to(system)` after construction to send and run the Module on the system."
)

if not isinstance(env, Env):
env = _get_env_from(env) or Env(name=Env.DEFAULT_NAME)
Expand All @@ -1280,7 +1280,6 @@ class (e.g. ``to``, ``fetch``, etc.). Properties and private methods are not int

module_subclass = _module_subclass_factory(cls, cls_pointers)
return module_subclass._module_init_only(
system=system,
env=env,
dryrun=dryrun,
pointers=cls_pointers,
Expand Down

0 comments on commit bfbe901

Please sign in to comment.