Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LLM]update ci test script #2285

Merged
merged 11 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 18 additions & 76 deletions llm/test/ci.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#命令行提供:1.PaddleNLP/llm的路径 2.Fastdeploy/llm的路径 3.关于存放(Paddlenlp结果和FD_DY结果的数据文件)
#命令行提供:1.PaddleNLP/llm的路径 2.{Fastdeploy}/llm的路径 3.关于存放(Paddlenlp结果和FD_DY结果的数据文件)
#存放的数据文件包括:NLP-llama-7b-fp16-bs1,NLP-llama-7b-fp16-bs4,NLP-llama-7b-ptuning-fp16-bs1,NLP-llama-7b-ptuning-fp16-bs4,NLP-llama-7b-ptuning-fp16-bs1-noprecache,NLP-llama-7b-ptuning-fp16-bs4-noprecache,
#NLP-belle-7b-2m-fp16-bs1,NLP-belle-7b-2m-fp16-bs4,NLP-belle-7b-2m-ptuning-fp16-bs1,NLP-belle-7b-2m-ptuning-fp16-bs4,NLP-belle-7b-2m-ptuning-fp16-bs1-noprecache,NLP-belle-7b-2m-ptuning-fp16-bs4-noprecache
#FD-llama-7b-fp16-bs4-dy,FD-llama-7b-ptuning-fp16-bs4-dy,FD-llama-7b-ptuning-fp16-bs4-dy-noprecache,FD-chatglm-6b-fp16-bs4-dy,FD-chatglm-6b-ptuning-fp16-bs4-dy,FD-chatglm-6b-ptuning-fp16-bs4-dy-noprecache,FD-belle-7b-2m-fp16-bs4-dy,FD-belle-7b-2m-ptuning-fp16-bs4-dy,FD-belle-7b-2m-ptuning-fp16-bs4-dy-noprecache
Expand All @@ -11,8 +11,11 @@


def main():

#获取安装包路径环境变量
current_file_path = os.path.abspath(os.getcwd())
py_version = os.environ.get('py_version')
paddlenlp = os.environ.get('paddlenlp')
fastdeploy = os.environ.get('fastdeploy')

#以下跑程序都用绝对路径
inference_model_path = f'{current_file_path}/inference_model' #推理模型导出存放文件
Expand All @@ -21,23 +24,7 @@ def main():
out_path = f'{current_file_path}/results.txt'
if os.path.exists(out_path): #原本存在,则删除,后面写文件会创建一个新的文件夹
os.remove(out_path)
#从网上下载测试结果,分别为NLP预存tar包和FD预存tar包,存入pre_result_path
if os.path.exists(pre_result_path):
os.system(command=f"rm -rf {pre_result_path}")
os.mkdir(pre_result_path)
NLP_name = 'paddlenlp_llm_results'
FD_name = 'fastdeploy_llm_dynamic_batching_results'
NLP_url = f'https://bj.bcebos.com/paddle2onnx/third_libs/{NLP_name}.tar'
FD_url = f'https://bj.bcebos.com/paddle2onnx/third_libs/{FD_name}.tar'

wget.download(NLP_url)
wget.download(FD_url)
os.system(command=f"tar -xvf {NLP_name}.tar ")
os.system(command=f"tar -xvf {FD_name}.tar ")
os.system(command=f"mv {NLP_name}/* {pre_result_path}")
os.system(command=f"mv {FD_name}/* {pre_result_path}")
os.system(command=f"rm -f {NLP_name}.tar")
os.system(command=f"rm -f {FD_name}.tar")
#准备工作,导出模型
export_model_name = [
'linly-ai/chinese-llama-2-7b', 'THUDM/chatglm-6b',
Expand All @@ -51,69 +38,28 @@ def main():
'belle-7b-2m-ptuning-fp16'
]
num_model = len(export_model_name)
#存放模型的绝对路径
#设置存放模型的绝对路径
noptuning_model_path_list = []
ptuning_model_path_list = []
#非P-Tuning导出以及P-Tuning导出
#判断存放模型文件是否存在
if os.path.exists(inference_model_path):
os.system(command=f"rm -rf {inference_model_path}")
os.mkdir(inference_model_path)
for i in range(num_model):
noptuning_model_path = os.path.join(inference_model_path,
f"{noptuning_model_name[i]}")
ptuning_model_path = os.path.join(inference_model_path,
f"{ptuning_model_name[i]}")
noptuning_model_path_list.append(noptuning_model_path)
ptuning_model_path_list.append(ptuning_model_path)
os.chdir(f"{current_file_path}/PaddleNLP/llm")
#非P-Tuning
if not os.path.exists(noptuning_model_path):
os.system(
command=f"python3 export_model.py --model_name_or_path {export_model_name[i]} --output_path {noptuning_model_path} --dtype float16 --inference_model"
)
#P-Tuning
if not os.path.exists(ptuning_model_path):
os.system(
command=f"python3 export_model.py --model_name_or_path {export_model_name[i]} --output_path {ptuning_model_path} --dtype float16 --inference_model --export_precache 1"
)
#模型会导出到 PaddleNLP/llm/inference_model/

#下载precache
#在Fastdeploy/llm中创建三个文件夹,存放三个模型的precache

precache_url = [
'https://bj.bcebos.com/fastdeploy/llm/llama-7b-precache.npy',
'https://bj.bcebos.com/fastdeploy/llm/chatglm-6b-precache.npy',
'https://bj.bcebos.com/fastdeploy/llm/bloom-7b-precache.npy'
]
target_name = 'task_prompt_embeddings.npy'
#设置存放模型路径
precache_path_list = []
for i in range(num_model):
precache_path = f"{current_file_path}/precache_{ptuning_model_name[i]}"
precache_path_list.append(precache_path)
precache_path_FD = os.path.join(precache_path, '8-test', '1')
if os.path.exists(precache_path_FD):
continue
else:
os.system(command=f"mkdir -p {precache_path_FD}")
wget.download(
precache_url[i],
out=os.path.join(precache_path, '8-test', '1', target_name))

#下载测试文件
inputs_url = 'https://bj.bcebos.com/paddle2onnx/third_libs/inputs_63.jsonl'
#设置测试文件路径
inputs_name = f'{current_file_path}/inputs_base.jsonl'
inputs_path = inputs_name
if os.path.exists(inputs_path):
os.system(command=f"rm -f {inputs_path}")
wget.download(inputs_url, out=inputs_path)
inputs_PT_url = 'https://bj.bcebos.com/paddle2onnx/third_libs/ptuning_inputs.json'
inputs_PT_name = f'{current_file_path}/inputs_precache.jsonl'
inputs_PT_path = inputs_PT_name
if os.path.exists(inputs_PT_path):
os.system(command=f"rm -f {inputs_PT_path}")
wget.download(inputs_PT_url, out=inputs_PT_path)

#进入Fastdeploy/llm进行测试
#分三个list进行结果存储(只存储一个模型的一行)
Expand All @@ -127,7 +73,7 @@ def main():
#清空共享内存
os.system(command='rm -rf /dev/shm')
#创建res文件进行结果存储,若已存在文件则将文件结果删除
res_path = f'{current_file_path}/FastDeploy/llm/res'
res_path = f'{fastdeploy}/llm/res'
if os.path.exists(res_path):
os.system(command=f"rm -f {res_path}/*")
else:
Expand All @@ -140,21 +86,17 @@ def main():
os.system(command=f"rm -rf {FD_result_path}")
os.mkdir(FD_result_path)
#测试非ptuning并保存diff率
# python3 test_serving.py /work/model_pkg/belle-7b-2m-fp16 inputs_63.jsonl 4 1
# python3 read_serving.py res fd_result/llama-6b-fp16-bs1.txt
# python3 print_diff.py nlp_result/chatglm-6b-fp16-bs4.txt fd_result/chatglm-6b-fp16-bs4-dy.txt
# NLP-belle-7b-2m-fp16-bs1

batch_size = [1, 4, 4]
disdy = [1, 1, 0]
mopt = ['NLP', 'NLP', 'FD']
bug_flag = 0
#总共需要三个维度,模型名称,模型类型(非ptuning,ptuning without precache,ptuning with precache),参数设置(bs=1,bs=4,bs=4动插)
os.system(
f'cp {current_file_path}/test_serving.py {current_file_path}/FastDeploy/llm/test_serving.py'
f'cp {current_file_path}/test_serving.py {fastdeploy}/llm/test_serving.py'
)
os.system(
f'cp {current_file_path}/read_serving.py {current_file_path}/FastDeploy/llm/read_serving.py'
f'cp {current_file_path}/read_serving.py {fastdeploy}/llm/read_serving.py'
)

#写入文件表头,获取非P-Tuning情况
Expand All @@ -165,14 +107,14 @@ def main():
f.write('%-30s%-30s%-30s%-30s\n' % (
"model", "bs=1(compare with PaddleNLP)",
"bs=4(compare with PaddleNLP)", "bs=4 stop=2(compare with FD)"))
os.chdir(f"{current_file_path}/FastDeploy/llm")
os.chdir(f"{fastdeploy}/llm")
for model_index in range(len(noptuning_model_path_list)): #遍历模型路径
for i in range(3): #遍历参数设置
os.system(
f"python3 test_serving.py {noptuning_model_path_list[model_index]} {inputs_path} {batch_size[i]} {disdy[i]} 0 0 {res_path}"
f"{py_version} test_serving.py {noptuning_model_path_list[model_index]} {inputs_path} {batch_size[i]} {disdy[i]} 0 0 {res_path}"
) #倒数二三个参数表示ptuning/precache
os.system(
f"python3 read_serving.py {res_path} {FD_result_path}/{noptuning_model_name[model_index]}-{opts[i]}.txt"
f"{py_version} read_serving.py {res_path} {FD_result_path}/{noptuning_model_name[model_index]}-{opts[i]}.txt"
)
file1 = os.path.join(
pre_result_path,
Expand Down Expand Up @@ -208,10 +150,10 @@ def main():
for model_index in range(len(ptuning_model_path_list)): #遍历模型名称
for i in range(3): #遍历参数设置
os.system(
f"python3 test_serving.py {ptuning_model_path_list[model_index]} {inputs_path} {batch_size[i]} {disdy[i]} 1 0 {res_path}"
f"{py_version} test_serving.py {ptuning_model_path_list[model_index]} {inputs_path} {batch_size[i]} {disdy[i]} 1 0 {res_path}"
) #倒数二三个参数表示ptuning/precache
os.system(
f"python3 read_serving.py {res_path} {FD_result_path}/{ptuning_model_name[model_index]}-{opts[i]}-noprecache.txt"
f"{py_version} read_serving.py {res_path} {FD_result_path}/{ptuning_model_name[model_index]}-{opts[i]}-noprecache.txt"
)
file1 = os.path.join(
pre_result_path,
Expand All @@ -236,10 +178,10 @@ def main():
for model_index in range(len(ptuning_model_path_list)): #遍历模型名称
for i in range(3): #遍历参数设置
os.system(
f"python3 test_serving.py {ptuning_model_path_list[model_index]} {inputs_PT_path} {batch_size[i]} {disdy[i]} 1 1 {res_path} {precache_path_list[model_index]}"
f"{py_version} test_serving.py {ptuning_model_path_list[model_index]} {inputs_PT_path} {batch_size[i]} {disdy[i]} 1 1 {res_path} {precache_path_list[model_index]}"
) #倒数二三个参数表示ptuning/precache
os.system(
f"python3 read_serving.py {res_path} {FD_result_path}/{ptuning_model_name[model_index]}-{opts[i]}.txt"
f"{py_version} read_serving.py {res_path} {FD_result_path}/{ptuning_model_name[model_index]}-{opts[i]}.txt"
)
file1 = os.path.join(
pre_result_path,
Expand Down
65 changes: 44 additions & 21 deletions llm/test/run.sh
Original file line number Diff line number Diff line change
@@ -1,28 +1,51 @@
#!/bin/bash

current_directory=$PWD
pip uninstall -y paddlepaddle-gpu
pip uninstall -y paddlenlp
unset http_proxy
unset https_proxy
wget https://bj.bcebos.com/fastdeploy/llm/paddlepaddle_gpu-0.0.0-cp38-cp38-linux_x86_64.whl
pip install paddlepaddle_gpu-0.0.0-cp38-cp38-linux_x86_64.whl
export https_proxy=http://172.19.56.199:3128
export http_proxy=http://172.19.56.199:3128
git clone https://github.com/PaddlePaddle/PaddleNLP.git
git clone -b llm https://github.com/PaddlePaddle/FastDeploy.git
pip install wget
unset http_proxy
unset https_proxy
cd PaddleNLP
python3 setup.py bdist_wheel
cd dist
pip install $(ls)
cd ..
cd csrc
python3 setup_cuda.py install --user

#环境安装 主要是安装paddlenlp算子
cd ${paddlenlp}/csrc
${py_version} setup_cuda.py install --user



#模型文件下载
cd $current_directory
#下载解压预存结果
NLP_name='paddlenlp_llm_results'
FD_name='fastdeploy_llm_dynamic_batching_results'
wget https://bj.bcebos.com/paddle2onnx/third_libs/${NLP_name}.tar
wget https://bj.bcebos.com/paddle2onnx/third_libs/${FD_name}.tar
tar -xvf https://bj.bcebos.com/paddle2onnx/third_libs/${NLP_name}.tar
tar -xvf https://bj.bcebos.com/paddle2onnx/third_libs/${FD_name}.tar
mkdir pre_result
mv ${NLP_name}/* pre_result/
mv ${FD_name}/* pre_result/
rm -f ${NLP_name}.tar
rm -f ${FD_name}.tar
#下载测试文件
wget -O inputs_base.jsonl https://bj.bcebos.com/paddle2onnx/third_libs/inputs_63.jsonl
wget -O inputs_precache.jsonl https://bj.bcebos.com/paddle2onnx/third_libs/ptuning_inputs.json
#下载precache文件以及导出静态模型
export_model_name=('linly-ai/chinese-llama-2-7b' 'THUDM/chatglm-6b' 'bellegroup/belle-7b-2m')
precache_url=('https://bj.bcebos.com/fastdeploy/llm/llama-7b-precache.npy' 'https://bj.bcebos.com/fastdeploy/llm/chatglm-6b-precache.npy' 'https://bj.bcebos.com/fastdeploy/llm/bloom-7b-precache.npy')
noptuning_model_name=('llama-7b-fp16' 'chatglm-6b-fp16' 'belle-7b-2m-fp16')
ptuning_model_name=('llama-7b-ptuning-fp16' 'chatglm-6b-ptuning-fp16' 'belle-7b-2m-ptuning-fp16')
target_name='task_prompt_embeddings.npy'
for((i=0;i<${#precache_url[*]};i++));do
mkdir -p precache_${ptuning_model_name[i]}/8-test/1
cd precache_${ptuning_model_name[i]}/8-test/1
wget -O ${target_name} ${precache_url[i]}
cd $current_directory
done
mkdir inference_model
cd ${paddlenlp}/llm
for((i=0;i<${#export_model_name[*]};i++));do
${py_version} export_model.py --model_name_path ${export_model_name[i]} --output_path ${current_directory}/inference_model/${noptuning_model_name[i]} --dtype float16 --inference_model
${py_version} export_model.py --model_name_path ${export_model_name[i]} --output_path ${current_directory}/inference_model/${ptuning_model_name[i]} --dtype float16 --inference_model --export_precache 1
done
cd $current_directory
python3 -u ci.py
#开启测试
${py_version} -u ci.py
result=$?
if [ $result -eq 0 ];then
echo "通过测试"
Expand Down