Skip to content

Commit

Permalink
Merge pull request #161 from henk717/united
Browse files Browse the repository at this point in the history
Release 1.19
  • Loading branch information
henk717 committed Oct 4, 2022
2 parents 2f45b93 + 7bd3125 commit cf3aebb
Show file tree
Hide file tree
Showing 57 changed files with 22,227 additions and 1,496 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
*.min.lua linguist-vendored
*documentation.html linguist-vendored
/static/swagger-ui/* linguist-vendored
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ bin
__pycache__
*.log
cache
accelerate-disk-cache
userscripts
!userscripts/examples
!userscripts/kaipreset_*.lua
Expand All @@ -24,6 +25,8 @@ softprompts
models
!models/models go here.txt
Uninstall
flask_session
accelerate-disk-cache
.ipynb_checkpoints

# Ignore PyCharm project files.
Expand Down
7,139 changes: 5,786 additions & 1,353 deletions aiserver.py

Large diffs are not rendered by default.

101 changes: 98 additions & 3 deletions breakmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
The ORIGINAL version of the patch is released under the Apache License 2.0
Copyright 2021 arrmansa
Copyright 2021 finetuneanon
Copyright 2018 The Hugging Face team
Copyright 2018, 2022 The Hugging Face team
Apache License
Expand Down Expand Up @@ -216,11 +216,13 @@
import torch.cuda.comm
import copy
import gc
import os
import sys
import itertools
import bisect
import random
from typing import Optional
import utils
from typing import Dict, List, Optional, Union

from transformers.modeling_outputs import BaseModelOutputWithPast, BaseModelOutputWithPastAndCrossAttentions

Expand All @@ -230,7 +232,100 @@

breakmodel = True
gpu_blocks = []
primary_device = 0
disk_blocks = 0
primary_device = 0 if torch.cuda.device_count() > 0 else "cpu"


if utils.HAS_ACCELERATE:
from accelerate.hooks import attach_align_device_hook_on_blocks
from accelerate.utils import OffloadedWeightsLoader, check_device_map, extract_submodules_state_dict, offload_state_dict
from accelerate import dispatch_model

def dispatch_model_ex(
model: nn.Module,
device_map: Dict[str, Union[str, int, torch.device]],
main_device: Optional[torch.device] = None,
state_dict: Optional[Dict[str, torch.Tensor]] = None,
offload_dir: Union[str, os.PathLike] = None,
offload_buffers: bool = False,
**kwargs,
):
"""
This is a modified version of
https://github.com/huggingface/accelerate/blob/eeaba598f455fbd2c48661d7e816d3ff25ab050b/src/accelerate/big_modeling.py#L130
that still works when the main device is the CPU.
Dispatches a model according to a given device map. Layers of the model might be spread across GPUs, offloaded on
the CPU or even the disk.
Args:
model (`torch.nn.Module`):
The model to dispatch.
device_map (`Dict[str, Union[str, int, torch.device]]`):
A dictionary mapping module names in the models `state_dict` to the device they should go to. Note that
`"disk"` is accepted even if it's not a proper value for `torch.device`.
main_device (`str`, `int` or `torch.device`, *optional*):
The main execution device. Will default to the first device in the `device_map` different from `"cpu"` or
`"disk"`.
state_dict (`Dict[str, torch.Tensor]`, *optional*):
The state dict of the part of the model that will be kept on CPU.
offload_dir (`str` or `os.PathLike`):
The folder in which to offload the model weights (or where the model weights are already offloaded).
offload_buffers (`bool`, *optional*, defaults to `False`):
Whether or not to offload the buffers with the model parameters.
preload_module_classes (`List[str]`, *optional*):
A list of classes whose instances should load all their weights (even in the submodules) at the beginning
of the forward. This should only be used for classes that have submodules which are registered but not
called directly during the forward, for instance if a `dense` linear layer is registered, but at forward,
`dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly.
"""
if main_device != "cpu":
return dispatch_model(model, device_map, main_device, state_dict, offload_dir=offload_dir, offload_buffers=offload_buffers, **kwargs)

# Error early if the device map is incomplete.
check_device_map(model, device_map)

offload_devices = ["cpu", "disk"] if main_device != "cpu" else ["disk"]

if main_device is None:
main_device = [d for d in device_map.values() if d not in offload_devices][0]

cpu_modules = [name for name, device in device_map.items() if device == "cpu"] if main_device != "cpu" else []
if state_dict is None and len(cpu_modules) > 0:
state_dict = extract_submodules_state_dict(model.state_dict(), cpu_modules)

disk_modules = [name for name, device in device_map.items() if device == "disk"]
if offload_dir is None and len(disk_modules) > 0:
raise ValueError(
"We need an `offload_dir` to dispatch this model according to this `device_map`, the following submodules "
f"need to be offloaded: {', '.join(disk_modules)}."
)
if len(disk_modules) > 0 and (
not os.path.isdir(offload_dir) or not os.path.isfile(os.path.join(offload_dir, "index.json"))
):
disk_state_dict = extract_submodules_state_dict(model.state_dict(), disk_modules)
offload_state_dict(offload_dir, disk_state_dict)

execution_device = {
name: main_device if device in offload_devices else device for name, device in device_map.items()
}
offload = {name: device in offload_devices for name, device in device_map.items()}
save_folder = offload_dir if len(disk_modules) > 0 else None
if state_dict is not None or save_folder is not None:
weights_map = OffloadedWeightsLoader(state_dict=state_dict, save_folder=save_folder)
else:
weights_map = None

attach_align_device_hook_on_blocks(
model,
execution_device=execution_device,
offload=offload,
offload_buffers=offload_buffers,
weights_map=weights_map,
**kwargs,
)
model.hf_device_map = device_map
return model


# Copied from transformers.models.bart.modeling_bart._expand_mask
Expand Down
1 change: 1 addition & 0 deletions customsettings_template.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"aria2_port":null, "breakmodel":null, "breakmodel_disklayers":null, "breakmodel_gpulayers":null, "breakmodel_layers":null, "colab":null, "configname":null, "cpu":null, "host":null, "localtunnel":null, "lowmem":null, "model":null, "ngrok":null, "no_aria2":null, "noaimenu":null, "nobreakmodel":null, "override_delete":null, "override_rename":null, "path":null, "port":null, "quiet":null, "remote":null, "revision":null, "savemodel":null, "unblock":null}
8 changes: 8 additions & 0 deletions docker-standalone/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM debian
RUN apt update && apt install wget aria2 git bzip2 -y
RUN git clone https://github.com/henk717/koboldai /opt/koboldai
WORKDIR /opt/koboldai
RUN ./install_requirements.sh cuda
COPY docker-helper.sh /opt/koboldai/docker-helper.sh
EXPOSE 5000/tcp
CMD /opt/koboldai/docker-helper.sh
17 changes: 17 additions & 0 deletions docker-standalone/Readme.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
These are the source files for the official versions of the standalone docker and are provided for completeness.
Using these files you will not use any of the local modifications you make, instead it will use the latest github version of KoboldAI as the basis.

If you wish to run KoboldAI containerised with access to the local directory you can do so using docker-cuda.sh or docker-rocm.sh instead.

We do not support ROCm in the standalone docker as it is intended for cloud deployment on CUDA systems.
If you wish to build a ROCm version instead, you can do so by modifying the Dockerfile and changing the install_requirements.sh from cuda to rocm.

Similarly you need to modify the Dockerfile to specify which branch of KoboldAI the docker is being built for.

Usage:
This docker will automatically assume the persistent volume is mounted to /content and will by default not store models there.
The following environment variables exist to adjust the behavior if desired.

KOBOLDAI_DATADIR=/content , this can be used to specify a different default location for your stories, settings, userscripts, etc in case your provider does not let you change the mounted folder path.
KOBOLDAI_MODELDIR= , This variable can be used to make model storage persistent, it can be the same location as your datadir but this is not required.
KOBOLDAI_ARGS= , This variable is built in KoboldAI and can be used to override the default launch options. Right now the docker by default will launch in remote mode, with output hidden from the logs and file management enabled.
47 changes: 47 additions & 0 deletions docker-standalone/docker-helper.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash
cd /opt/koboldai
git pull
#./install_requirements.sh cuda

if [[ ! -v KOBOLDAI_DATADIR ]];then
mkdir /content
KOBOLDAI_DATADIR=/content
fi

mkdir $KOBOLDAI_DATADIR/stories
if [[ ! -v KOBOLDAI_MODELDIR ]];then
mkdir $KOBOLDAI_MODELDIR/models
fi
mkdir $KOBOLDAI_DATADIR/settings
mkdir $KOBOLDAI_DATADIR/softprompts
mkdir $KOBOLDAI_DATADIR/userscripts
#mkdir $KOBOLDAI_MODELDIR/cache

cp -rn stories/* $KOBOLDAI_DATADIR/stories/
cp -rn userscripts/* $KOBOLDAI_DATADIR/userscripts/
cp -rn softprompts/* $KOBOLDAI_DATADIR/softprompts/

rm stories
rm -rf stories/
rm userscripts
rm -rf userscripts/
rm softprompts
rm -rf softprompts/

if [[ ! -v KOBOLDAI_MODELDIR ]];then
rm models
rm -rf models/
#rm cache
#rm -rf cache/
fi

ln -s $KOBOLDAI_DATADIR/stories/ stories
ln -s $KOBOLDAI_DATADIR/settings/ settings
ln -s $KOBOLDAI_DATADIR/softprompts/ softprompts
ln -s $KOBOLDAI_DATADIR/userscripts/ userscripts
if [[ ! -v KOBOLDAI_MODELDIR ]];then
ln -s $KOBOLDAI_MODELDIR/models/ models
#ln -s $KOBOLDAI_MODELDIR/cache/ cache
fi

PYTHONUNBUFFERED=1 ./play.sh --remote --quiet --override_delete --override_rename
4 changes: 4 additions & 0 deletions environments/finetuneanon.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ channels:
dependencies:
- colorama
- flask-socketio
- flask-session
- pytorch
- cudatoolkit=11.1
- tensorflow-gpu
Expand All @@ -15,6 +16,9 @@ dependencies:
- bleach=4.1.0
- pip
- git=2.35.1
- marshmallow>=3.13
- apispec-webframeworks
- loguru
- pip:
- git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b
- flask-cloudflared
Expand Down
6 changes: 5 additions & 1 deletion environments/huggingface.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ channels:
dependencies:
- colorama
- flask-socketio
- flask-session
- pytorch=1.11.*
- python=3.8.*
- cudatoolkit=11.1
Expand All @@ -16,9 +17,12 @@ dependencies:
- git=2.35.1
- sentencepiece
- protobuf
- marshmallow>=3.13
- apispec-webframeworks
- loguru
- pip:
- flask-cloudflared
- flask-ngrok
- lupa==1.10
- transformers>=4.20.1
- accelerate
- accelerate
4 changes: 4 additions & 0 deletions environments/rocm-finetune.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@ channels:
dependencies:
- colorama
- flask-socketio
- flask-session
- python=3.8.*
- eventlet
- markdown
- bleach=4.1.0
- pip
- git=2.35.1
- marshmallow>=3.13
- apispec-webframeworks
- loguru
- pip:
- --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html
- torch
Expand Down
8 changes: 6 additions & 2 deletions environments/rocm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ channels:
dependencies:
- colorama
- flask-socketio
- flask-session
- python=3.8.*
- eventlet
- markdown
Expand All @@ -13,9 +14,12 @@ dependencies:
- git=2.35.1
- sentencepiece
- protobuf
- marshmallow>=3.13
- apispec-webframeworks
- loguru
- pip:
- --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html
- torch==1.10.*
- --extra-index-url https://download.pytorch.org/whl/rocm5.1.1
- torch
- torchvision
- flask-cloudflared
- flask-ngrok
Expand Down
9 changes: 5 additions & 4 deletions fileops.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import json
import zipfile
from logger import logger

#==================================================================#
# Generic Method for prompting for file path
Expand Down Expand Up @@ -149,16 +150,16 @@ def getspfiles(model_dimension: int):
continue
z, version, shape, fortran_order, dtype = checksp(file, model_dimension)
if z == 1:
print(f"Browser SP loading error: {file} is malformed or not a soft prompt ZIP file.")
logger.warning(f"Softprompt {file} is malformed or not a soft prompt ZIP file.")
continue
if z == 2:
print(f"Browser SP loading error: {file} tensor.npy has unsupported dtype '{dtype.name}'.")
logger.warning(f"Softprompt {file} tensor.npy has unsupported dtype '{dtype.name}'.")
continue
if z == 3:
print(f"Browser SP loading error: {file} tensor.npy has model dimension {shape[1]} which does not match your model's model dimension of {model_dimension}. This usually means this soft prompt is not compatible with your model.")
logger.debug(f"Softprompt {file} tensor.npy has model dimension {shape[1]} which does not match your model's model dimension of {model_dimension}. This usually means this soft prompt is not compatible with your model.")
continue
if z == 4:
print(f"Browser SP loading error: {file} tensor.npy has {shape[0]} tokens but it is supposed to have less than 2048 tokens.")
logger.warning(f"Softprompt {file} tensor.npy has {shape[0]} tokens but it is supposed to have less than 2048 tokens.")
continue
assert isinstance(z, zipfile.ZipFile)
try:
Expand Down
Loading

0 comments on commit cf3aebb

Please sign in to comment.