From 338fe2afec30aed153c656a7acc9d5ff56a10408 Mon Sep 17 00:00:00 2001 From: Jack Zhou Date: Sat, 17 Sep 2022 00:55:08 +0800 Subject: [PATCH] Add use_faster flag for uie of taskflow. (#3194) * Add use_faster flag for taskflow * Add empty line * Add doc of uie * remove faster_tokenizer tmp * merge --- model_zoo/uie/README.md | 5 +++-- paddlenlp/taskflow/information_extraction.py | 8 ++++++-- paddlenlp/transformers/auto/tokenizer.py | 13 ++++++++++++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/model_zoo/uie/README.md b/model_zoo/uie/README.md index 7dd54b7ded62..7bb3530da9f0 100644 --- a/model_zoo/uie/README.md +++ b/model_zoo/uie/README.md @@ -513,7 +513,8 @@ UIE不限定行业领域和抽取目标,以下是一些零样本行业示例 batch_size=1, model='uie-base', position_prob=0.5, - precision='fp32') + precision='fp32', + use_faster=False) ``` * `schema`:定义任务抽取目标,可参考开箱即用中不同任务的调用示例进行配置。 @@ -522,7 +523,7 @@ UIE不限定行业领域和抽取目标,以下是一些零样本行业示例 * `model`:选择任务使用的模型,默认为`uie-base`,可选有`uie-base`, `uie-medium`, `uie-mini`, `uie-micro`, `uie-nano`和`uie-medical-base`, `uie-base-en`。 * `position_prob`:模型对于span的起始位置/终止位置的结果概率在0~1之间,返回结果去掉小于这个阈值的结果,默认为0.5,span的最终概率输出为起始位置概率和终止位置概率的乘积。 * `precision`:选择模型精度,默认为`fp32`,可选有`fp16`和`fp32`。`fp16`推理速度更快。如果选择`fp16`,请先确保机器正确安装NVIDIA相关驱动和基础软件,**确保CUDA>=11.2,cuDNN>=8.1.1**,初次使用需按照提示安装相关依赖。其次,需要确保GPU设备的CUDA计算能力(CUDA Compute Capability)大于7.0,典型的设备包括V100、T4、A10、A100、GTX 20系列和30系列显卡等。更多关于CUDA Compute Capability和精度支持情况请参考NVIDIA文档:[GPU硬件与支持精度对照表](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-840-ea/support-matrix/index.html#hardware-precision-matrix)。 - +* `use_faster`: 使用C++实现的高性能分词算子FasterTokenizer进行文本预处理加速。需要通过`pip install faster_tokenizer`安装FasterTokenizer库后方可使用。默认为`False`。更多使用说明可参考[FasterTokenizer文档](../../faster_tokenizer)。 ## 4. 训练定制 diff --git a/paddlenlp/taskflow/information_extraction.py b/paddlenlp/taskflow/information_extraction.py index a7cb72611a90..af6d4b9d4c75 100755 --- a/paddlenlp/taskflow/information_extraction.py +++ b/paddlenlp/taskflow/information_extraction.py @@ -356,7 +356,6 @@ def __init__(self, task, model, schema, schema_lang="zh", **kwargs): self._schema_tree = None self.set_schema(schema) self._check_task_files() - self._construct_tokenizer() self._check_predictor_type() self._get_inference_model() self._usage = usage @@ -374,6 +373,9 @@ def __init__(self, task, model, schema, schema_lang="zh", **kwargs): 'lazy_load'] if 'lazy_load' in self.kwargs else False self._num_workers = self.kwargs[ 'num_workers'] if 'num_workers' in self.kwargs else 0 + self.use_faster = self.kwargs[ + 'use_faster'] if 'use_faster' in self.kwargs else False + self._construct_tokenizer() def set_schema(self, schema): if isinstance(schema, dict) or isinstance(schema, str): @@ -424,7 +426,8 @@ def _construct_tokenizer(self): """ Construct the tokenizer for the predictor. """ - self._tokenizer = AutoTokenizer.from_pretrained(self._task_path) + self._tokenizer = AutoTokenizer.from_pretrained( + self._task_path, use_faster=self.use_faster) def _preprocess(self, inputs): """ @@ -880,6 +883,7 @@ def _construct_tokenizer(self): """ Construct the tokenizer for the predictor. """ + # TODO(zhoushunjie): Will set use_faster=True in future. self._tokenizer = AutoTokenizer.from_pretrained(self._task_path) def _preprocess(self, inputs): diff --git a/paddlenlp/transformers/auto/tokenizer.py b/paddlenlp/transformers/auto/tokenizer.py index 7ff7cfa15f30..5b462b7065dd 100644 --- a/paddlenlp/transformers/auto/tokenizer.py +++ b/paddlenlp/transformers/auto/tokenizer.py @@ -123,6 +123,7 @@ class AutoTokenizer(): MAPPING_NAMES = get_configurations() _tokenizer_mapping = MAPPING_NAMES _name_mapping = TOKENIZER_MAPPING_NAMES + _faster_name_mapping = FASTER_TOKENIZER_MAPPING_NAMES tokenizer_config_file = "tokenizer_config.json" def __init__(self, *args, **kwargs): @@ -183,7 +184,6 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, for names, tokenizer_class in cls._tokenizer_mapping.items(): for name in names: all_tokenizer_names.append(name) - # From built-in pretrained models if pretrained_model_name_or_path in all_tokenizer_names: for names, tokenizer_classes in cls._tokenizer_mapping.items(): @@ -234,11 +234,22 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, init_class = init_kwargs.pop("init_class", None) if init_class is None: init_class = init_kwargs.pop("tokenizer_class", None) + if init_class: class_name = cls._name_mapping[init_class] import_class = importlib.import_module( f"paddlenlp.transformers.{class_name}.tokenizer") tokenizer_class = getattr(import_class, init_class) + if use_faster: + for faster_tokenizer_class, name in cls._faster_name_mapping.items( + ): + if name == class_name: + import_class = importlib.import_module( + f"paddlenlp.transformers.{class_name}.faster_tokenizer" + ) + tokenizer_class = getattr( + import_class, faster_tokenizer_class) + break logger.info( "We are using %s to load '%s'." % (tokenizer_class, pretrained_model_name_or_path))