From e7d67410bcc8c6c9db86bd7fad0029f961b55d4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=AA=91=E9=A9=AC=E5=B0=8F=E7=8C=AB?= <1435130236@qq.com>
Date: Fri, 8 Dec 2023 13:21:41 +0800
Subject: [PATCH] open ci testing (#7606)

---
 tests/llm/test_predictor.py | 27 ++++++++++++++++++---------
 tests/llm/testing_utils.py  |  7 ++-----
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/tests/llm/test_predictor.py b/tests/llm/test_predictor.py
index 349de42a5097..6878d373e5cb 100644
--- a/tests/llm/test_predictor.py
+++ b/tests/llm/test_predictor.py
@@ -22,6 +22,8 @@
 from paddlenlp.transformers import (  # ChatGLMForCausalLM,
     AutoTokenizer,
     BloomForCausalLM,
+    ChatGLMForCausalLM,
+    ChatGLMv2ForCausalLM,
     LlamaForCausalLM,
 )
 from paddlenlp.utils.downloader import (
@@ -38,9 +40,8 @@
     [
         ["__internal_testing__/tiny-random-llama", LlamaForCausalLM],
         ["__internal_testing__/tiny-fused-bloom", BloomForCausalLM],
-        # ["__internal_testing__/tiny-fused-chatglm", ChatGLMForCausalLM],
-        # TODO(wj-Mcat): disable chatglm2 test temporarily
-        # ["__internal_testing__/tiny-fused-chatglm2", ChatGLMv2ForCausalLM],
+        ["__internal_testing__/tiny-fused-chatglm", ChatGLMForCausalLM],
+        ["__internal_testing__/tiny-fused-chatglm2", ChatGLMv2ForCausalLM],
     ],
 )
 class PredictorTest(LLMTest, unittest.TestCase):
@@ -69,8 +70,12 @@ def test_predictor(self):
             count += int(inference_item[: min_length // 2] == no_inference_item[: min_length // 2])
             full_match += int(inference_item[:min_length] == no_inference_item[:min_length])
 
-        # self.assertGreaterEqual(full_match / len(result_0), 0.25)
-        # self.assertGreaterEqual(count / len(result_0), 0.4)
+        self.assertGreaterEqual(full_match / len(result_0), 0.25)
+
+        if self.model_name_or_path == "__internal_testing__/tiny-fused-chatglm":
+            self.assertGreaterEqual(count / len(result_0), 0.3)
+        else:
+            self.assertGreaterEqual(count / len(result_0), 0.4)
 
     def test_wint8(self):
         self.run_predictor({"inference_model": True, "quant_type": "weight_only_int8"})
@@ -86,8 +91,12 @@ def test_wint8(self):
             count += int(inference_item[: min_length // 2] == no_inference_item[: min_length // 2])
             full_match += int(inference_item[:min_length] == no_inference_item[:min_length])
 
-        # self.assertGreaterEqual(full_match / len(result_0), 0.1)
-        # self.assertGreater(count / len(result_0), 0.4)
+        self.assertGreaterEqual(full_match / len(result_0), 0.1)
+
+        if self.model_name_or_path == "__internal_testing__/tiny-fused-chatglm":
+            self.assertGreaterEqual(count / len(result_0), 0.3)
+        else:
+            self.assertGreaterEqual(count / len(result_0), 0.4)
 
 
 @parameterized_class(
@@ -134,5 +143,5 @@ def test_predictor(self):
             count += int(inference_item[: min_length // 2] == no_inference_item[: min_length // 2])
             full_match += int(inference_item[:min_length] == no_inference_item[:min_length])
 
-        # self.assertGreaterEqual(full_match / len(result_0), 0.6)
-        # self.assertGreaterEqual(count / len(result_0), 0.8)
+        self.assertGreaterEqual(full_match / len(result_0), 0.6)
+        self.assertGreaterEqual(count / len(result_0), 0.8)
diff --git a/tests/llm/testing_utils.py b/tests/llm/testing_utils.py
index 6742f0b4bca1..583e5479549f 100644
--- a/tests/llm/testing_utils.py
+++ b/tests/llm/testing_utils.py
@@ -106,8 +106,5 @@ def run_predictor(self, config_params=None):
         infer_result = self._read_result(config["output_file"])
         assert len(predict_result) == len(infer_result)
 
-        if not config_params.get("inference_model", False):
-            # TODO(wj-Mcat): https://github.com/PaddlePaddle/PaddleNLP/pull/7496
-            # do testing under no inference-model
-            for predict_item, infer_item in zip(predict_result, infer_result):
-                self.assertEqual(predict_item, infer_item)
+        for predict_item, infer_item in zip(predict_result, infer_result):
+            self.assertEqual(predict_item, infer_item)