From e7d67410bcc8c6c9db86bd7fad0029f961b55d4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AA=91=E9=A9=AC=E5=B0=8F=E7=8C=AB?= <1435130236@qq.com> Date: Fri, 8 Dec 2023 13:21:41 +0800 Subject: [PATCH] open ci testing (#7606) --- tests/llm/test_predictor.py | 27 ++++++++++++++++++--------- tests/llm/testing_utils.py | 7 ++----- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/tests/llm/test_predictor.py b/tests/llm/test_predictor.py index 349de42a5097..6878d373e5cb 100644 --- a/tests/llm/test_predictor.py +++ b/tests/llm/test_predictor.py @@ -22,6 +22,8 @@ from paddlenlp.transformers import ( # ChatGLMForCausalLM, AutoTokenizer, BloomForCausalLM, + ChatGLMForCausalLM, + ChatGLMv2ForCausalLM, LlamaForCausalLM, ) from paddlenlp.utils.downloader import ( @@ -38,9 +40,8 @@ [ ["__internal_testing__/tiny-random-llama", LlamaForCausalLM], ["__internal_testing__/tiny-fused-bloom", BloomForCausalLM], - # ["__internal_testing__/tiny-fused-chatglm", ChatGLMForCausalLM], - # TODO(wj-Mcat): disable chatglm2 test temporarily - # ["__internal_testing__/tiny-fused-chatglm2", ChatGLMv2ForCausalLM], + ["__internal_testing__/tiny-fused-chatglm", ChatGLMForCausalLM], + ["__internal_testing__/tiny-fused-chatglm2", ChatGLMv2ForCausalLM], ], ) class PredictorTest(LLMTest, unittest.TestCase): @@ -69,8 +70,12 @@ def test_predictor(self): count += int(inference_item[: min_length // 2] == no_inference_item[: min_length // 2]) full_match += int(inference_item[:min_length] == no_inference_item[:min_length]) - # self.assertGreaterEqual(full_match / len(result_0), 0.25) - # self.assertGreaterEqual(count / len(result_0), 0.4) + self.assertGreaterEqual(full_match / len(result_0), 0.25) + + if self.model_name_or_path == "__internal_testing__/tiny-fused-chatglm": + self.assertGreaterEqual(count / len(result_0), 0.3) + else: + self.assertGreaterEqual(count / len(result_0), 0.4) def test_wint8(self): self.run_predictor({"inference_model": True, "quant_type": "weight_only_int8"}) @@ -86,8 +91,12 @@ def test_wint8(self): count += int(inference_item[: min_length // 2] == no_inference_item[: min_length // 2]) full_match += int(inference_item[:min_length] == no_inference_item[:min_length]) - # self.assertGreaterEqual(full_match / len(result_0), 0.1) - # self.assertGreater(count / len(result_0), 0.4) + self.assertGreaterEqual(full_match / len(result_0), 0.1) + + if self.model_name_or_path == "__internal_testing__/tiny-fused-chatglm": + self.assertGreaterEqual(count / len(result_0), 0.3) + else: + self.assertGreaterEqual(count / len(result_0), 0.4) @parameterized_class( @@ -134,5 +143,5 @@ def test_predictor(self): count += int(inference_item[: min_length // 2] == no_inference_item[: min_length // 2]) full_match += int(inference_item[:min_length] == no_inference_item[:min_length]) - # self.assertGreaterEqual(full_match / len(result_0), 0.6) - # self.assertGreaterEqual(count / len(result_0), 0.8) + self.assertGreaterEqual(full_match / len(result_0), 0.6) + self.assertGreaterEqual(count / len(result_0), 0.8) diff --git a/tests/llm/testing_utils.py b/tests/llm/testing_utils.py index 6742f0b4bca1..583e5479549f 100644 --- a/tests/llm/testing_utils.py +++ b/tests/llm/testing_utils.py @@ -106,8 +106,5 @@ def run_predictor(self, config_params=None): infer_result = self._read_result(config["output_file"]) assert len(predict_result) == len(infer_result) - if not config_params.get("inference_model", False): - # TODO(wj-Mcat): https://github.com/PaddlePaddle/PaddleNLP/pull/7496 - # do testing under no inference-model - for predict_item, infer_item in zip(predict_result, infer_result): - self.assertEqual(predict_item, infer_item) + for predict_item, infer_item in zip(predict_result, infer_result): + self.assertEqual(predict_item, infer_item)