Merge pull request #161 from henk717/united

Release 1.19
KoboldAI · Oct 4, 2022 · cf3aebb · cf3aebb
2 parents 2f45b93 + 7bd3125
commit cf3aebb
Show file tree

Hide file tree

Showing 57 changed files with 22,227 additions and 1,496 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -1,2 +1,3 @@
 *.min.lua linguist-vendored
 *documentation.html linguist-vendored
+/static/swagger-ui/* linguist-vendored
diff --git a/.gitignore b/.gitignore
@@ -15,6 +15,7 @@ bin
 __pycache__
 *.log
 cache
+accelerate-disk-cache
 userscripts
 !userscripts/examples
 !userscripts/kaipreset_*.lua
@@ -24,6 +25,8 @@ softprompts
 models
 !models/models go here.txt
 Uninstall
+flask_session
+accelerate-disk-cache
 .ipynb_checkpoints
 
 # Ignore PyCharm project files.

diff --git a/aiserver.py b/aiserver.py
diff --git a/breakmodel.py b/breakmodel.py
@@ -4,7 +4,7 @@
 The ORIGINAL version of the patch is released under the Apache License 2.0
 Copyright 2021 arrmansa
 Copyright 2021 finetuneanon
-Copyright 2018 The Hugging Face team
+Copyright 2018, 2022 The Hugging Face team
 
 
                                  Apache License
@@ -216,11 +216,13 @@
 import torch.cuda.comm
 import copy
 import gc
+import os
 import sys
 import itertools
 import bisect
 import random
-from typing import Optional
+import utils
+from typing import Dict, List, Optional, Union
 
 from transformers.modeling_outputs import BaseModelOutputWithPast, BaseModelOutputWithPastAndCrossAttentions
 
@@ -230,7 +232,100 @@
 
 breakmodel = True
 gpu_blocks = []
-primary_device = 0
+disk_blocks = 0
+primary_device = 0 if torch.cuda.device_count() > 0 else "cpu"
+
+
+if utils.HAS_ACCELERATE:
+    from accelerate.hooks import attach_align_device_hook_on_blocks
+    from accelerate.utils import OffloadedWeightsLoader, check_device_map, extract_submodules_state_dict, offload_state_dict
+    from accelerate import dispatch_model
+
+def dispatch_model_ex(
+    model: nn.Module,
+    device_map: Dict[str, Union[str, int, torch.device]],
+    main_device: Optional[torch.device] = None,
+    state_dict: Optional[Dict[str, torch.Tensor]] = None,
+    offload_dir: Union[str, os.PathLike] = None,
+    offload_buffers: bool = False,
+    **kwargs,
+):
+    """
+    This is a modified version of
+    https://github.com/huggingface/accelerate/blob/eeaba598f455fbd2c48661d7e816d3ff25ab050b/src/accelerate/big_modeling.py#L130
+    that still works when the main device is the CPU.
+
+    Dispatches a model according to a given device map. Layers of the model might be spread across GPUs, offloaded on
+    the CPU or even the disk.
+
+    Args:
+        model (`torch.nn.Module`):
+            The model to dispatch.
+        device_map (`Dict[str, Union[str, int, torch.device]]`):
+            A dictionary mapping module names in the models `state_dict` to the device they should go to. Note that
+            `"disk"` is accepted even if it's not a proper value for `torch.device`.
+        main_device (`str`, `int` or `torch.device`, *optional*):
+            The main execution device. Will default to the first device in the `device_map` different from `"cpu"` or
+            `"disk"`.
+        state_dict (`Dict[str, torch.Tensor]`, *optional*):
+            The state dict of the part of the model that will be kept on CPU.
+        offload_dir (`str` or `os.PathLike`):
+            The folder in which to offload the model weights (or where the model weights are already offloaded).
+        offload_buffers (`bool`, *optional*, defaults to `False`):
+            Whether or not to offload the buffers with the model parameters.
+        preload_module_classes (`List[str]`, *optional*):
+            A list of classes whose instances should load all their weights (even in the submodules) at the beginning
+            of the forward. This should only be used for classes that have submodules which are registered but not
+            called directly during the forward, for instance if a `dense` linear layer is registered, but at forward,
+            `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly.
+    """
+    if main_device != "cpu":
+        return dispatch_model(model, device_map, main_device, state_dict, offload_dir=offload_dir, offload_buffers=offload_buffers, **kwargs)
+
+    # Error early if the device map is incomplete.
+    check_device_map(model, device_map)
+
+    offload_devices = ["cpu", "disk"] if main_device != "cpu" else ["disk"]
+
+    if main_device is None:
+        main_device = [d for d in device_map.values() if d not in offload_devices][0]
+
+    cpu_modules = [name for name, device in device_map.items() if device == "cpu"] if main_device != "cpu" else []
+    if state_dict is None and len(cpu_modules) > 0:
+        state_dict = extract_submodules_state_dict(model.state_dict(), cpu_modules)
+
+    disk_modules = [name for name, device in device_map.items() if device == "disk"]
+    if offload_dir is None and len(disk_modules) > 0:
+        raise ValueError(
+            "We need an `offload_dir` to dispatch this model according to this `device_map`, the following submodules "
+            f"need to be offloaded: {', '.join(disk_modules)}."
+        )
+    if len(disk_modules) > 0 and (
+        not os.path.isdir(offload_dir) or not os.path.isfile(os.path.join(offload_dir, "index.json"))
+    ):
+        disk_state_dict = extract_submodules_state_dict(model.state_dict(), disk_modules)
+        offload_state_dict(offload_dir, disk_state_dict)
+
+    execution_device = {
+        name: main_device if device in offload_devices else device for name, device in device_map.items()
+    }
+    offload = {name: device in offload_devices for name, device in device_map.items()}
+    save_folder = offload_dir if len(disk_modules) > 0 else None
+    if state_dict is not None or save_folder is not None:
+        weights_map = OffloadedWeightsLoader(state_dict=state_dict, save_folder=save_folder)
+    else:
+        weights_map = None
+
+    attach_align_device_hook_on_blocks(
+        model,
+        execution_device=execution_device,
+        offload=offload,
+        offload_buffers=offload_buffers,
+        weights_map=weights_map,
+        **kwargs,
+    )
+    model.hf_device_map = device_map
+    return model
 
 
 # Copied from transformers.models.bart.modeling_bart._expand_mask

diff --git a/customsettings_template.json b/customsettings_template.json
@@ -0,0 +1 @@
+{"aria2_port":null, "breakmodel":null, "breakmodel_disklayers":null, "breakmodel_gpulayers":null, "breakmodel_layers":null, "colab":null, "configname":null, "cpu":null, "host":null, "localtunnel":null, "lowmem":null, "model":null, "ngrok":null, "no_aria2":null, "noaimenu":null, "nobreakmodel":null, "override_delete":null, "override_rename":null, "path":null, "port":null, "quiet":null, "remote":null, "revision":null, "savemodel":null, "unblock":null}
diff --git a/docker-standalone/Dockerfile b/docker-standalone/Dockerfile
@@ -0,0 +1,8 @@
+FROM debian
+RUN apt update && apt install wget aria2 git bzip2 -y
+RUN git clone https://github.com/henk717/koboldai /opt/koboldai
+WORKDIR /opt/koboldai
+RUN ./install_requirements.sh cuda
+COPY docker-helper.sh /opt/koboldai/docker-helper.sh
+EXPOSE 5000/tcp
+CMD /opt/koboldai/docker-helper.sh
diff --git a/docker-standalone/Readme.txt b/docker-standalone/Readme.txt
@@ -0,0 +1,17 @@
+These are the source files for the official versions of the standalone docker and are provided for completeness.
+Using these files you will not use any of the local modifications you make, instead it will use the latest github version of KoboldAI as the basis.
+
+If you wish to run KoboldAI containerised with access to the local directory you can do so using docker-cuda.sh or docker-rocm.sh instead.
+
+We do not support ROCm in the standalone docker as it is intended for cloud deployment on CUDA systems.
+If you wish to build a ROCm version instead, you can do so by modifying the Dockerfile and changing the install_requirements.sh from cuda to rocm.
+
+Similarly you need to modify the Dockerfile to specify which branch of KoboldAI the docker is being built for.
+
+Usage:
+This docker will automatically assume the persistent volume is mounted to /content and will by default not store models there.
+The following environment variables exist to adjust the behavior if desired.
+
+KOBOLDAI_DATADIR=/content , this can be used to specify a different default location for your stories, settings, userscripts, etc in case your provider does not let you change the mounted folder path.
+KOBOLDAI_MODELDIR= , This variable can be used to make model storage persistent, it can be the same location as your datadir but this is not required.
+KOBOLDAI_ARGS= , This variable is built in KoboldAI and can be used to override the default launch options. Right now the docker by default will launch in remote mode, with output hidden from the logs and file management enabled.
diff --git a/docker-standalone/docker-helper.sh b/docker-standalone/docker-helper.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+cd /opt/koboldai
+git pull
+#./install_requirements.sh cuda
+
+if [[ ! -v KOBOLDAI_DATADIR ]];then
+	mkdir /content
+	KOBOLDAI_DATADIR=/content
+fi
+
+mkdir $KOBOLDAI_DATADIR/stories
+if [[ ! -v KOBOLDAI_MODELDIR ]];then
+	mkdir $KOBOLDAI_MODELDIR/models
+fi
+mkdir $KOBOLDAI_DATADIR/settings
+mkdir $KOBOLDAI_DATADIR/softprompts
+mkdir $KOBOLDAI_DATADIR/userscripts
+#mkdir $KOBOLDAI_MODELDIR/cache
+
+cp -rn stories/* $KOBOLDAI_DATADIR/stories/
+cp -rn userscripts/* $KOBOLDAI_DATADIR/userscripts/
+cp -rn softprompts/* $KOBOLDAI_DATADIR/softprompts/
+
+rm stories
+rm -rf stories/
+rm userscripts
+rm -rf userscripts/
+rm softprompts
+rm -rf softprompts/
+
+if [[ ! -v KOBOLDAI_MODELDIR ]];then
+	rm models
+	rm -rf models/
+	#rm cache
+	#rm -rf cache/
+fi
+
+ln -s $KOBOLDAI_DATADIR/stories/ stories
+ln -s $KOBOLDAI_DATADIR/settings/ settings
+ln -s $KOBOLDAI_DATADIR/softprompts/ softprompts
+ln -s $KOBOLDAI_DATADIR/userscripts/ userscripts
+if [[ ! -v KOBOLDAI_MODELDIR ]];then
+	ln -s $KOBOLDAI_MODELDIR/models/ models
+	#ln -s $KOBOLDAI_MODELDIR/cache/ cache
+fi
+
+PYTHONUNBUFFERED=1 ./play.sh --remote --quiet --override_delete --override_rename
diff --git a/environments/finetuneanon.yml b/environments/finetuneanon.yml
@@ -6,6 +6,7 @@ channels:
 dependencies:
   - colorama
   - flask-socketio
+  - flask-session
   - pytorch
   - cudatoolkit=11.1
   - tensorflow-gpu
@@ -15,6 +16,9 @@ dependencies:
   - bleach=4.1.0
   - pip
   - git=2.35.1
+  - marshmallow>=3.13
+  - apispec-webframeworks
+  - loguru
   - pip:
     - git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b
     - flask-cloudflared

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
@@ -6,6 +6,7 @@ channels:
 dependencies:
   - colorama
   - flask-socketio
+  - flask-session
   - pytorch=1.11.*
   - python=3.8.*
   - cudatoolkit=11.1
@@ -16,9 +17,12 @@ dependencies:
   - git=2.35.1
   - sentencepiece
   - protobuf
+  - marshmallow>=3.13
+  - apispec-webframeworks
+  - loguru
   - pip:
     - flask-cloudflared
     - flask-ngrok
     - lupa==1.10
     - transformers>=4.20.1
-    - accelerate
+    - accelerate
diff --git a/environments/rocm-finetune.yml b/environments/rocm-finetune.yml
@@ -5,12 +5,16 @@ channels:
 dependencies:
   - colorama
   - flask-socketio
+  - flask-session
   - python=3.8.*
   - eventlet
   - markdown
   - bleach=4.1.0
   - pip
   - git=2.35.1
+  - marshmallow>=3.13
+  - apispec-webframeworks
+  - loguru
   - pip:
     - --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html
     - torch

diff --git a/environments/rocm.yml b/environments/rocm.yml
@@ -5,6 +5,7 @@ channels:
 dependencies:
   - colorama
   - flask-socketio
+  - flask-session
   - python=3.8.*
   - eventlet
   - markdown
@@ -13,9 +14,12 @@ dependencies:
   - git=2.35.1
   - sentencepiece
   - protobuf
+  - marshmallow>=3.13
+  - apispec-webframeworks
+  - loguru
   - pip:
-    - --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html
-    - torch==1.10.*
+    - --extra-index-url https://download.pytorch.org/whl/rocm5.1.1
+    - torch
     - torchvision
     - flask-cloudflared
     - flask-ngrok

diff --git a/fileops.py b/fileops.py
@@ -3,6 +3,7 @@
 import os
 import json
 import zipfile
+from logger import logger
 
 #==================================================================#
 #  Generic Method for prompting for file path
@@ -149,16 +150,16 @@ def getspfiles(model_dimension: int):
             continue
         z, version, shape, fortran_order, dtype = checksp(file, model_dimension)
         if z == 1:
-            print(f"Browser SP loading error: {file} is malformed or not a soft prompt ZIP file.")
+            logger.warning(f"Softprompt {file} is malformed or not a soft prompt ZIP file.")
             continue
         if z == 2:
-            print(f"Browser SP loading error: {file} tensor.npy has unsupported dtype '{dtype.name}'.")
+            logger.warning(f"Softprompt {file} tensor.npy has unsupported dtype '{dtype.name}'.")
             continue
         if z == 3:
-            print(f"Browser SP loading error: {file} tensor.npy has model dimension {shape[1]} which does not match your model's model dimension of {model_dimension}. This usually means this soft prompt is not compatible with your model.")
+            logger.debug(f"Softprompt {file} tensor.npy has model dimension {shape[1]} which does not match your model's model dimension of {model_dimension}. This usually means this soft prompt is not compatible with your model.")
             continue
         if z == 4:
-            print(f"Browser SP loading error: {file} tensor.npy has {shape[0]} tokens but it is supposed to have less than 2048 tokens.")
+            logger.warning(f"Softprompt {file} tensor.npy has {shape[0]} tokens but it is supposed to have less than 2048 tokens.")
             continue
         assert isinstance(z, zipfile.ZipFile)
         try: