Merge remote-tracking branch 'origin/main' into new-token-ci-support-…

…den-prod # Conflicts: # .github/workflows/local_tests_den_dev.yaml # runhouse/rns/rns_client.py # runhouse/servers/cluster_servlet.py # runhouse/servers/http/auth.py # runhouse/servers/obj_store.py
run-house · Sep 12, 2024 · a8d4a40 · a8d4a40
2 parents 7dc62e0 + 95baa6c
commit a8d4a40
Show file tree

Hide file tree

Showing 36 changed files with 678 additions and 308 deletions.
diff --git a/.github/workflows/build_docs.yaml b/.github/workflows/build_docs.yaml
@@ -18,7 +18,7 @@ jobs:
       - name: Build docs
         run: cd docs && make html && cd ..
       - name: Upload artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: docs
           path: docs/_build/html
diff --git a/.github/workflows/local_tests_den_dev.yaml b/.github/workflows/local_tests_den_dev.yaml
@@ -41,14 +41,9 @@ jobs:
           token: ${{ secrets.DEN_TESTER_DEV_TOKEN }}
           api_server_url: ${{ env.API_SERVER_URL }}
 
-      - name: Set Environment Variables for Dev
-        run: |
-          echo "KITCHEN_TESTER_TOKEN=${{ secrets.KITCHEN_TESTER_DEV_TOKEN }}" >> $GITHUB_ENV
-          echo "ORG_MEMBER_TOKEN=${{ secrets.ORG_MEMBER_DEV_TOKEN }}" >> $GITHUB_ENV
-
       - name: pytest -v --level local tests/test_servers/
         env:
-          KITCHEN_TESTER_TOKEN: ${{ env.KITCHEN_TESTER_TOKEN }}
+          KITCHEN_TESTER_TOKEN: ${{ secrets.KITCHEN_TESTER_DEV_TOKEN }}
           KITCHEN_TESTER_USERNAME: ${{ secrets.KITCHEN_TESTER_USERNAME }}
         run: pytest -v --level local tests/test_servers/ --api-server-url $API_SERVER_URL
         timeout-minutes: 60
@@ -84,16 +79,11 @@ jobs:
           token: ${{ secrets.DEN_TESTER_DEV_TOKEN }}
           api_server_url: ${{ env.API_SERVER_URL }}
 
-      - name: Set Environment Variables for Dev
-        run: |
-          echo "KITCHEN_TESTER_TOKEN=${{ secrets.KITCHEN_TESTER_DEV_TOKEN }}" >> $GITHUB_ENV
-          echo "ORG_MEMBER_TOKEN=${{ secrets.ORG_MEMBER_DEV_TOKEN }}" >> $GITHUB_ENV
-
       - name: pytest -v --level local -k "not servertest and not secrettest and not moduletest and not functiontest and not envtest"
         env:
-          KITCHEN_TESTER_TOKEN: ${{ env.KITCHEN_TESTER_TOKEN }}
+          KITCHEN_TESTER_TOKEN: ${{ secrets.KITCHEN_TESTER_DEV_TOKEN }}
           KITCHEN_TESTER_USERNAME: ${{ secrets.KITCHEN_TESTER_USERNAME }}
-          ORG_MEMBER_TOKEN: ${{ env.ORG_MEMBER_TOKEN }}
+          ORG_MEMBER_TOKEN: ${{ secrets.ORG_MEMBER_DEV_TOKEN }}
           ORG_MEMBER_USERNAME: ${{ secrets.ORG_MEMBER_USERNAME }}
         run: pytest -v --level local -k "not servertest and not secrettest and not moduletest and not functiontest and not envtest" --api-server-url $API_SERVER_URL
         timeout-minutes: 60
@@ -129,16 +119,11 @@ jobs:
           token: ${{ secrets.DEN_TESTER_DEV_TOKEN }}
           api_server_url: ${{ env.API_SERVER_URL }}
 
-      - name: Set Environment Variables for Dev
-        run: |
-          echo "KITCHEN_TESTER_TOKEN=${{ secrets.KITCHEN_TESTER_DEV_TOKEN }}" >> $GITHUB_ENV
-          echo "ORG_MEMBER_TOKEN=${{ secrets.ORG_MEMBER_DEV_TOKEN }}" >> $GITHUB_ENV
-
       - name: pytest -v --level local -k "secrettest"
         env:
-          KITCHEN_TESTER_TOKEN: ${{ env.KITCHEN_TESTER_TOKEN }}
+          KITCHEN_TESTER_TOKEN: ${{ secrets.KITCHEN_TESTER_DEV_TOKEN }}
           KITCHEN_TESTER_USERNAME: ${{ secrets.KITCHEN_TESTER_USERNAME }}
-          ORG_MEMBER_TOKEN: ${{ env.ORG_MEMBER_TOKEN }}
+          ORG_MEMBER_TOKEN: ${{ secrets.ORG_MEMBER_DEV_TOKEN }}
           ORG_MEMBER_USERNAME: ${{ secrets.ORG_MEMBER_USERNAME }}
         run: pytest -v --level local -k "secrettest" --api-server-url $API_SERVER_URL
         timeout-minutes: 60
@@ -159,16 +144,11 @@ jobs:
           token: ${{ secrets.DEN_TESTER_DEV_TOKEN }}
           api_server_url: ${{ env.API_SERVER_URL }}
 
-      - name: Set Environment Variables for Dev
-        run: |
-          echo "KITCHEN_TESTER_TOKEN=${{ secrets.KITCHEN_TESTER_DEV_TOKEN }}" >> $GITHUB_ENV
-          echo "ORG_MEMBER_TOKEN=${{ secrets.ORG_MEMBER_DEV_TOKEN }}" >> $GITHUB_ENV
-
       - name: pytest -v --level local -k "moduletest"
         env:
-          KITCHEN_TESTER_TOKEN: ${{ env.KITCHEN_TESTER_TOKEN }}
+          KITCHEN_TESTER_TOKEN: ${{ secrets.KITCHEN_TESTER_DEV_TOKEN }}
           KITCHEN_TESTER_USERNAME: ${{ secrets.KITCHEN_TESTER_USERNAME }}
-          ORG_MEMBER_TOKEN: ${{ env.ORG_MEMBER_TOKEN }}
+          ORG_MEMBER_TOKEN: ${{ secrets.ORG_MEMBER_DEV_TOKEN }}
           ORG_MEMBER_USERNAME: ${{ secrets.ORG_MEMBER_USERNAME }}
         run: pytest -v --level local -k "moduletest" --api-server-url $API_SERVER_URL
         timeout-minutes: 60
@@ -189,16 +169,11 @@ jobs:
           token: ${{ secrets.DEN_TESTER_DEV_TOKEN }}
           api_server_url: ${{ env.API_SERVER_URL }}
 
-      - name: Set Environment Variables for Dev
-        run: |
-          echo "KITCHEN_TESTER_TOKEN=${{ secrets.KITCHEN_TESTER_DEV_TOKEN }}" >> $GITHUB_ENV
-          echo "ORG_MEMBER_TOKEN=${{ secrets.ORG_MEMBER_DEV_TOKEN }}" >> $GITHUB_ENV
-
       - name: pytest -v --level local -k "functiontest"
         env:
-          KITCHEN_TESTER_TOKEN: ${{ env.KITCHEN_TESTER_TOKEN }}
+          KITCHEN_TESTER_TOKEN: ${{ secrets.KITCHEN_TESTER_DEV_TOKEN }}
           KITCHEN_TESTER_USERNAME: ${{ secrets.KITCHEN_TESTER_USERNAME }}
-          ORG_MEMBER_TOKEN: ${{ env.ORG_MEMBER_TOKEN }}
+          ORG_MEMBER_TOKEN: ${{ secrets.ORG_MEMBER_DEV_TOKEN }}
           ORG_MEMBER_USERNAME: ${{ secrets.ORG_MEMBER_USERNAME }}
         run: pytest -v --level local -k "functiontest" --api-server-url $API_SERVER_URL
         timeout-minutes: 60
@@ -219,16 +194,11 @@ jobs:
           token: ${{ secrets.DEN_TESTER_DEV_TOKEN }}
           api_server_url: ${{ env.API_SERVER_URL }}
 
-      - name: Set Environment Variables for Dev
-        run: |
-          echo "KITCHEN_TESTER_TOKEN=${{ secrets.KITCHEN_TESTER_DEV_TOKEN }}" >> $GITHUB_ENV
-          echo "ORG_MEMBER_TOKEN=${{ secrets.ORG_MEMBER_DEV_TOKEN }}" >> $GITHUB_ENV
-
       - name: pytest -v --level local -k "envtest"
         env:
-          KITCHEN_TESTER_TOKEN: ${{ env.KITCHEN_TESTER_TOKEN }}
+          KITCHEN_TESTER_TOKEN: ${{ secrets.KITCHEN_TESTER_DEV_TOKEN }}
           KITCHEN_TESTER_USERNAME: ${{ secrets.KITCHEN_TESTER_USERNAME }}
-          ORG_MEMBER_TOKEN: ${{ env.ORG_MEMBER_TOKEN }}
+          ORG_MEMBER_TOKEN: ${{ secrets.ORG_MEMBER_DEV_TOKEN }}
           ORG_MEMBER_USERNAME: ${{ secrets.ORG_MEMBER_USERNAME }}
         run: pytest -v --level local -k "envtest" --api-server-url $API_SERVER_URL
         timeout-minutes: 60
diff --git a/.github/workflows/nightly_release_testing.yaml b/.github/workflows/nightly_release_testing.yaml
@@ -111,7 +111,7 @@ jobs:
           KITCHEN_TESTER_USERNAME: ${{ secrets.KITCHEN_TESTER_USERNAME }}
           ORG_MEMBER_TOKEN: ${{ secrets.ORG_MEMBER_PROD_TOKEN }}
           ORG_MEMBER_USERNAME: ${{ secrets.ORG_MEMBER_USERNAME }}
-        run: pytest --level release tests -k "ondemand_aws_cluster" --detached
+        run: pytest --level release tests -k "ondemand_aws_docker_cluster" --detached
         timeout-minutes: 180
 
       - name: Teardown all ondemand-aws-tests clusters

diff --git a/.github/workflows/sagemaker_tests.yaml b/.github/workflows/sagemaker_tests.yaml
diff --git a/examples/lora-example-with-notebook/readme.md b/examples/lora-example-with-notebook/readme.md
@@ -0,0 +1,7 @@
+## LoRA Fine-Tuning Class with Example of Notebook Usage
+In this example, we define a Fine Tuner class (LoraFineTuner.py) in **regular Python** and launch remote GPU compute to do the fine-tuning.
+
+In particular, we show how you can start the fine tuning and interact with the fine-tuning class (a remote object) through regular Python or a Notebook. Runhouse lets you work *locally* with *remote objects* defined by regular code and edited locally, compared to tooling like hosted notebooks which let you *work locally while SSH'ed into a remote setting.* This offers a few distinct advantages:
+* **Real compute and real data:** ML Engineers and data scientists do not need to launch projects on toy compute offered in a research environment.
+* **Real code:** Rather than working on Notebooks (because they have to), your team is writing code and developing locally just like a normal software team. The only difference is dispatching the work for remote computation since the local machine doesn't have the right hardware.
+* **Fast research to production:** The work done while writing and testing the class is essentially enough to bring the work to production as well. There is no costly rebuilding of the same code a second time to work in a Pipeline.
diff --git a/examples/torch-training/TorchBasicExample.py b/examples/torch-training/TorchBasicExample.py
@@ -33,18 +33,39 @@
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
+from PIL import Image
 
 from torch.utils.data import DataLoader
 from torchvision import datasets, transforms
 
 
 # Let's define a function that downloads the data. You can imagine this as a generic function to access data.
-def DownloadData(path="./data"):
+def download_data(path="./data"):
     datasets.MNIST(path, train=True, download=True)
     datasets.MNIST(path, train=False, download=True)
     print("Done with data download")
 
 
+def preprocess_data(path):
+    transform = transforms.Compose(
+        [
+            transforms.Resize(
+                (28, 28), interpolation=Image.BILINEAR
+            ),  # Resize to 28x28 using bilinear interpolation
+            transforms.ToTensor(),
+            transforms.Normalize(
+                (0.5,), (0.5,)
+            ),  # Normalize with mean=0.5, std=0.5 for general purposes
+        ]
+    )
+
+    train = datasets.MNIST(path, train=False, download=False, transform=transform)
+    test = datasets.MNIST(path, train=False, download=False, transform=transform)
+    print("Done with data preprocessing")
+    print(f"Number of training samples: {len(train)}")
+    print(f"Number of test samples: {len(test)}")
+
+
 # Next, we define a model class. We define a very basic feedforward neural network with three fully connected layers.
 class TorchExampleBasic(nn.Module):
     def __init__(self):
@@ -78,9 +99,7 @@ def __init__(self):
         self.train_loader = None
         self.test_loader = None
 
-        self.transform = transforms.Compose(
-            [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
-        )
+        self.transform = transforms.Compose([transforms.ToTensor()])
 
         self.accuracy = None
         self.test_loss = None
@@ -216,7 +235,8 @@ def return_status(self):
     remote_torch_example = rh.module(SimpleTrainer).to(
         cluster, env=env, name="torch-basic-training"
     )
-    remote_download = rh.function(DownloadData).to(cluster, env=env)
+    remote_download = rh.function(download_data).to(cluster, env=env)
+    remote_preprocess = rh.function(preprocess_data).to(cluster, env=env)
 
     # ## Calling our remote Trainer
     # We instantiate the remote class
@@ -235,6 +255,7 @@ def return_status(self):
     # We create the datasets remotely, and then send them to the remote model / remote .load_train() method. The "preprocessing" happens remotely.
     # They become instance variables of the remote Trainer.
     remote_download()
+    remote_preprocess()
 
     model.load_train("./data", batch_size)
     model.load_test("./data", batch_size)

diff --git a/examples/torch-training/airflow-multicloud/DataProcessing.py b/examples/torch-training/airflow-multicloud/DataProcessing.py
@@ -0,0 +1,40 @@
+import os
+
+import boto3
+
+
+# Download data from S3
+def download_folder_from_s3(bucket_name, s3_folder_prefix, local_folder_path):
+    s3 = boto3.client("s3")
+
+    paginator = s3.get_paginator("list_objects_v2")
+    for page in paginator.paginate(Bucket=bucket_name, Prefix=s3_folder_prefix):
+        if "Contents" in page:
+            for obj in page["Contents"]:
+                s3_key = obj["Key"]
+                relative_path = os.path.relpath(s3_key, s3_folder_prefix)
+                local_path = os.path.join(local_folder_path, relative_path)
+
+                os.makedirs(os.path.dirname(local_path), exist_ok=True)
+                s3.download_file(bucket_name, s3_key, local_path)
+                print(f"Downloaded {s3_key} to {local_path}")
+
+
+# download_folder_from_s3('rh-demo-external', 'your/s3/folder/prefix', '/path/to/local/folder', 'your-access-key-id', 'your-secret-access-key')
+
+
+# Upload data to S3 bucket
+def upload_folder_to_s3(local_folder_path, bucket_name, s3_folder_prefix):
+    s3 = boto3.client("s3")
+
+    for root, dirs, files in os.walk(local_folder_path):
+        for file in files:
+            local_path = os.path.join(root, file)
+            relative_path = os.path.relpath(local_path, local_folder_path)
+            s3_path = os.path.join(s3_folder_prefix, relative_path)
+
+            s3.upload_file(local_path, bucket_name, s3_path)
+            print(f"Uploaded {local_path} to s3://{bucket_name}/{s3_path}")
+
+
+# upload_folder_to_s3('/path/to/local/folder', 'rh-demo-external', 'your/s3/folder/prefix', 'your-access-key-id', 'your-secret-access-key')