open ci testing (#7606)

PaddlePaddle · Dec 8, 2023 · e7d6741 · e7d6741
1 parent 9c279f7
commit e7d6741
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 14 deletions.
diff --git a/tests/llm/test_predictor.py b/tests/llm/test_predictor.py
@@ -22,6 +22,8 @@
 from paddlenlp.transformers import (  # ChatGLMForCausalLM,
     AutoTokenizer,
     BloomForCausalLM,
+    ChatGLMForCausalLM,
+    ChatGLMv2ForCausalLM,
     LlamaForCausalLM,
 )
 from paddlenlp.utils.downloader import (
@@ -38,9 +40,8 @@
     [
         ["__internal_testing__/tiny-random-llama", LlamaForCausalLM],
         ["__internal_testing__/tiny-fused-bloom", BloomForCausalLM],
-        # ["__internal_testing__/tiny-fused-chatglm", ChatGLMForCausalLM],
-        # TODO(wj-Mcat): disable chatglm2 test temporarily
-        # ["__internal_testing__/tiny-fused-chatglm2", ChatGLMv2ForCausalLM],
+        ["__internal_testing__/tiny-fused-chatglm", ChatGLMForCausalLM],
+        ["__internal_testing__/tiny-fused-chatglm2", ChatGLMv2ForCausalLM],
     ],
 )
 class PredictorTest(LLMTest, unittest.TestCase):
@@ -69,8 +70,12 @@ def test_predictor(self):
             count += int(inference_item[: min_length // 2] == no_inference_item[: min_length // 2])
             full_match += int(inference_item[:min_length] == no_inference_item[:min_length])
 
-        # self.assertGreaterEqual(full_match / len(result_0), 0.25)
-        # self.assertGreaterEqual(count / len(result_0), 0.4)
+        self.assertGreaterEqual(full_match / len(result_0), 0.25)
+
+        if self.model_name_or_path == "__internal_testing__/tiny-fused-chatglm":
+            self.assertGreaterEqual(count / len(result_0), 0.3)
+        else:
+            self.assertGreaterEqual(count / len(result_0), 0.4)
 
     def test_wint8(self):
         self.run_predictor({"inference_model": True, "quant_type": "weight_only_int8"})
@@ -86,8 +91,12 @@ def test_wint8(self):
             count += int(inference_item[: min_length // 2] == no_inference_item[: min_length // 2])
             full_match += int(inference_item[:min_length] == no_inference_item[:min_length])
 
-        # self.assertGreaterEqual(full_match / len(result_0), 0.1)
-        # self.assertGreater(count / len(result_0), 0.4)
+        self.assertGreaterEqual(full_match / len(result_0), 0.1)
+
+        if self.model_name_or_path == "__internal_testing__/tiny-fused-chatglm":
+            self.assertGreaterEqual(count / len(result_0), 0.3)
+        else:
+            self.assertGreaterEqual(count / len(result_0), 0.4)
 
 
 @parameterized_class(
@@ -134,5 +143,5 @@ def test_predictor(self):
             count += int(inference_item[: min_length // 2] == no_inference_item[: min_length // 2])
             full_match += int(inference_item[:min_length] == no_inference_item[:min_length])
 
-        # self.assertGreaterEqual(full_match / len(result_0), 0.6)
-        # self.assertGreaterEqual(count / len(result_0), 0.8)
+        self.assertGreaterEqual(full_match / len(result_0), 0.6)
+        self.assertGreaterEqual(count / len(result_0), 0.8)
diff --git a/tests/llm/testing_utils.py b/tests/llm/testing_utils.py
@@ -106,8 +106,5 @@ def run_predictor(self, config_params=None):
         infer_result = self._read_result(config["output_file"])
         assert len(predict_result) == len(infer_result)
 
-        if not config_params.get("inference_model", False):
-            # TODO(wj-Mcat): https://github.com/PaddlePaddle/PaddleNLP/pull/7496
-            # do testing under no inference-model
-            for predict_item, infer_item in zip(predict_result, infer_result):
-                self.assertEqual(predict_item, infer_item)
+        for predict_item, infer_item in zip(predict_result, infer_result):
+            self.assertEqual(predict_item, infer_item)