From 4c77a9086c488a9a0b11d4e7f0c406c31716345e Mon Sep 17 00:00:00 2001 From: Baibaifan <39549453+Baibaifan@users.noreply.github.com> Date: Fri, 14 Jan 2022 15:38:49 +0800 Subject: [PATCH 01/10] Add dygraph sharding stage3 (#38052) --- paddle/pten/core/dense_tensor.cc | 4 + .../meta_parallel/sharding/sharding_stage3.py | 675 ++++++++++++++++++ .../meta_parallel/sharding/sharding_utils.py | 31 +- .../fluid/tests/unittests/CMakeLists.txt | 3 + .../unittests/dygraph_sharding_stage3.py | 233 ++++++ .../unittests/test_dygraph_sharding_stage3.py | 31 + 6 files changed, 960 insertions(+), 17 deletions(-) create mode 100644 python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py create mode 100644 python/paddle/fluid/tests/unittests/dygraph_sharding_stage3.py create mode 100644 python/paddle/fluid/tests/unittests/test_dygraph_sharding_stage3.py diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc index 0b5f5cb18e13d..eb6f834d72779 100644 --- a/paddle/pten/core/dense_tensor.cc +++ b/paddle/pten/core/dense_tensor.cc @@ -435,6 +435,10 @@ inline T* DenseTensor::mutable_data(const paddle::platform::Place& place, } void DenseTensor::ShareBufferWith(const DenseTensor& tensor) { + if (storage_ == nullptr) { + storage_ = make_intrusive( + paddle::platform::CPUPlace()); + } if (storage_ != nullptr && tensor.storage_ != nullptr) { storage_->set_data_shared(tensor.storage_->data_shared()); } diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py new file mode 100644 index 0000000000000..e5d04aac1551e --- /dev/null +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py @@ -0,0 +1,675 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import copy +import time +import contextlib +import logging +import functools +import numpy as np +from itertools import chain +from functools import reduce +from types import MethodType +from collections import deque, OrderedDict + +import paddle +from paddle import nn +from paddle.autograd import PyLayer +import paddle.fluid.core as core +import paddle.distributed as dist +from paddle.fluid.framework import ParamBase +from paddle.fluid.clip import ClipGradByGlobalNorm +from paddle.distributed.collective import _get_global_group + +from .sharding_utils import Type, ShardingClipGrad +from ..pp_utils.utils import _all_gather + +# CUDA alignment 256 bytes +alignment = {"gpu": 256, } +align = { + Type.fp16.value: 2, + Type.fp32.value: 4, +} + +global CHECK_LAYER +CHECK_LAYER = dict() # Help to check layer's id -> layer's name + + +class ShardingStage3(nn.Layer): + """ + A wrapper for Sharding Stage3 Layer in Dygraph. + + .. warning: ShardingStage3 encapsulates the layer strategy and integrates it into the nn.Layer. + + .. ZeRO: https://arxiv.org/pdf/1910.02054.pdf. + """ + + def __init__(self, + layer, + optimizer, + group=None, + sync_buffers=False, + device="gpu", + pertrain_sync_models=True, + accumulate_grads=False, + offload=False, + sync_comm=False): + super().__init__() + + # Default configs + assert core.is_compiled_with_cuda(), "Only support CUDA." + self._layer = layer + self._default_device = device + self.__sync_buffers = sync_buffers + self._accumulate_grads = accumulate_grads + self._offload = offload + self._sync_comm = sync_comm + + # Communication group establishment + self._group = dist.new_group(_get_global_group() + .ranks) if group is None else group + self._world_size_scaling = 1.0 / self._group.nranks + assert self._group.nranks > 1, "Training must be distributed, ranks must be greater than 1." + self._rank = self._group.rank + self._global_root_rank = 0 # picking rank 0 as the reference + self._global_ranks = self._group.ranks + self._param2buffer_size = dict() # {param.name: size} + self._param2buffer = dict( + ) # {param.name: [(start0, end0),(start1, end1), ...]} + self._trainable_params = dict() # {layer.name: [trainable_params]} + + assert not isinstance( + optimizer, list), "Multiple optimizers are not supported now." + self._optim = _OptimizerWrapper(optimizer, self._offload, self._group, + self._update_params_slice) + self._ori_parameter_list = self._optim._parameter_list + self._ori_param_groups = self._optim._param_groups + + # Replace optimizer's _grad_clip + if isinstance(self._optim._grad_clip, ClipGradByGlobalNorm): + logging.warning( + "While using ClipGradByGlobalNorm in ShardingStage3, the grad clip of original optimizer will be changed." + ) + self._optim._grad_clip = ShardingClipGrad(self._optim._grad_clip, + paddle.get_device(), + self._group) + + # Synchronous all ranks models + if pertrain_sync_models: + self._sync_params_and_buffers() + + self._segment_rank_params(self._layer) + + # In the first step, record the execution order of the layer + self._order_tracer = OrderedDict() + self._order_tracer["order"] = 0 + self._order_tracer["layer"] = [] + # Register task flow + self._task_flow = TaskFlow() + # Register forward hooks + self._register_forward_hooks(self._layer) + # Register backward parameter hooks + self._register_backward_hooks() + # Redefine optimizer step and clear function + self._redefine_opt_step() + self._redefine_opt_clear() + + @paddle.no_grad() + def _sync_params_and_buffers(self): + """ + Sync all model states for all ranks + """ + + for p in self._layer.parameters(): + dist.broadcast( + p, + src=self._global_root_rank, + group=self._group, + use_calc_stream=True) + + # Multi stream operation will be supported later + dist.wait(tensor=p, group=self._group, use_calc_stream=True) + + def _clear_gradients(self): + assert len(self._trainable_params.keys()) > 0 + current_layer_params = self._layer.parameters(include_sublayers=True) + trainable_params = list( + filter(lambda x: x.trainable, current_layer_params)) + for param in trainable_params: + assert hasattr( + param, "fw_storage" + ), "Find {} don't have fw_storage attribute.".format(param.name) + + # param.bw_storage.zero_() + param.fw_storage.clear_gradient(False) + param.fw_storage._gradient_set_empty(False) + param.bw_storage._clear() + + # Update param memery slice + def _update_params_slice(self): + update_list = self._update_params() + + if not isinstance(self._optim._param_groups[0], dict): + slice_params = [param.fw_storage for param in update_list] + self._optim._parameter_list = slice_params + self._optim._param_groups = slice_params + else: + params_name_list = list(map(lambda p: p.name, update_list)) + for param_group in self._optim._param_groups: + slice_p = [] + for p in param_group['params']: + if p.name in params_name_list: + assert hasattr( + p, "fw_storage" + ), "Find {} don't have fw_storage attribute.".format( + p.name) + slice_p.append(p.fw_storage) + param_group['params'] = slice_p + + def forward(self, *inputs, **kwargs): + """ + A wrapper for Sharding Stage3 layer. + """ + # 1.Sync layer's buffers state + if self.__sync_buffers: + self._sync_buffers() + + # 2.Normal FW on the base model + fw = self._layer(*inputs, **kwargs) + + return fw + + def _segment_rank_params(self, layer, name="last_layer"): + current_layer_params = _current_layer_params(layer) + if current_layer_params: + CHECK_LAYER[id(layer)] = name + self._flatten_layer_params(layer, current_layer_params) + + for name, sub_layer in layer.named_children(): + self._segment_rank_params(sub_layer, name) + + def _flatten_layer_params(self, layer, current_layer_params): + def _add_manage_info(trainable_param): + return _PartitionParam(trainable_param) + + trainable_params = list( + filter(lambda x: x.trainable, current_layer_params)) + assert id(layer) not in self._trainable_params.keys() + self._trainable_params[id(layer)] = list( + map(_add_manage_info, trainable_params)) + + for param in self._trainable_params[id(layer)]: + if param.name in self._param2buffer.keys(): + continue + self._param2buffer[param.name] = [] + # 1.Params alignment + offset = 0 + # CUDA alignment 256 bytes + size = param._numel() * align[param.dtype] + remaining = size % alignment[self._default_device] + ali = 0 if remaining == 0 else alignment[ + self._default_device] - remaining + align_ = ali // align[param.dtype] + + offset = align_ + param._numel() + buffer_size = offset if offset % self._group.nranks == 0 else offset + self._group.nranks - ( + offset % self._group.nranks) + self._param2buffer_size[param.name] = buffer_size + + # 2.Combination param buffer + assert buffer_size % self._group.nranks == 0 + pre_buffer = buffer_size // self._group.nranks + + for rank_ in range(self._group.nranks): + self._param2buffer[param.name].append( + (rank_ * pre_buffer, (rank_ + 1) * pre_buffer)) + + # 3.Flatten layer params and release other rank buffer + self._param_storage(param, buffer_size) + + def _param_storage(self, param, buffer_size): + assert isinstance(buffer_size, int) + value = np.zeros( + buffer_size, + dtype=np.float16) if Type.fp16.value == param.dtype else np.zeros( + buffer_size, dtype=np.float32) + buffer = core.VarBase(value=value, place=core.CPUPlace()) + + param_shape = param.shape + origin_state = param.stop_gradient + param.stop_gradient = True + param.flatten_() + param.stop_gradient = origin_state + start, end = self._param2buffer[param.name][self._rank] + + # Copy the current param value + tmp_var = core.VarBase( + tensor=buffer._slice(0, param._numel()), place=core.CPUPlace()) + param_cpu = param.cpu() + tmp_var.value().get_tensor().set(param_cpu.value().get_tensor(), + core.CPUPlace()) + param.value().get_tensor()._set_dims(param_shape) + param._clear() + + # Current rank param_storage + param.fw_storage = core.VarBase( + buffer._slice(start, end), "slice@" + param.name) + param.status = "part" + + # Updata optimizer master weights + if param.dtype == Type.fp16.value: + self._optim._master_weights[param.fw_storage.name] = paddle.cast( + param.fw_storage, Type.fp32.value) + + def _register_forward_hooks(self, layer): + current_layer_params = _current_layer_params(layer) + if current_layer_params: + self._register_forward_all_hooks(layer, self._task_flow) + + for _, sub_layer in layer.named_children(): + self._register_forward_hooks(sub_layer) + + def _register_forward_all_hooks(self, sub_layer, task_flow): + def _forward_pre_hook(layer, inputs): + return ForwardPreHooks(layer, self._order_tracer, + self._trainable_params, self._param2buffer, + self._rank, self._group, self._sync_comm, + task_flow) + + def _forward_post_hook(layer, inputs, outputs): + return ForwardPostHooks.apply( + outputs, layer, self._order_tracer, self._trainable_params, + self._param2buffer, self._param2buffer_size, self._rank, + self._group, self._sync_comm, task_flow) + + # register previous forward hooks + sub_layer.register_forward_pre_hook(_forward_pre_hook) + + # register post forward hooks + sub_layer.register_forward_post_hook(_forward_post_hook) + + @paddle.no_grad() + def _sync_buffers(self): + for buffer in self._layer.buffers(include_sublayers=True): + dist.broadcast( + buffer, + self._global_root_rank, + self._group, + use_calc_stream=True) + # Multi stream operation will be supported later + dist.wait(tensor=buffer, group=self._group, use_calc_stream=True) + + def __getattr__(self, name): + """Forward missing attributes to wrapped layer.""" + try: + return super().__getattr__(name) + except AttributeError: + return getattr(self._layer, name) + + def _update_params(self): + update_list = [] + assert len(self._trainable_params.keys()) > 0 + current_layer_params = self._layer.parameters(include_sublayers=True) + trainable_params = list( + filter(lambda x: x.trainable, current_layer_params)) + for param in trainable_params: + assert hasattr( + param, + "fw_storage"), "Find {} don't have fw_storage attribute".format( + param.name) + + if self._accumulate_grads: + param.bw_storage.scale_(scale=self._world_size_scaling) + param.fw_storage = _VarBaseWrapper(param) + param.fw_storage._copy_gradient_from(param.bw_storage) + update_list.append(param) + return update_list + + def get_all_parameters(self): + assert len(self._trainable_params.keys()) > 0 + current_layer_params = self._layer.parameters(include_sublayers=True) + trainable_params = list( + filter(lambda x: x.trainable, current_layer_params)) + for param in trainable_params: + if param.use_count > 0: + continue + assert hasattr( + param, + "fw_storage"), "Find {} don't have fw_storage attribute".format( + param.name) + + full_param = _all_gather( + param.fw_storage, self._group, use_calc_stream=True) + dist.wait( + tensor=full_param, group=self._group, use_calc_stream=True) + core.VarBase(full_param._slice(0, param._numel()))._share_buffer_to( + param) + param.value().get_tensor()._set_dims(param.shape) + param.fw_storage._clear() + param.fw_storage = None + param.status = "all" + param.use_count += 1 + + self._optim._parameter_list = self._ori_parameter_list + self._optim._param_groups = self._ori_param_groups + + def _register_backward_hooks(self): + current_layer_params = self._layer.parameters(include_sublayers=True) + trainable_params = list( + filter(lambda x: x.trainable, current_layer_params)) + + for param in trainable_params: + allreduce_function = self._get_allreduce_fn(param) + param._register_backward_hook(allreduce_function) + + def _get_allreduce_fn(self, param): + @paddle.no_grad() + def reduce(*_): + if param.name in self._task_flow.full_grad.keys(): + full_grad = self._task_flow.full_grad[param.name] + with paddle.amp.auto_cast(enable=False): + if not self._accumulate_grads: + full_grad.scale_(scale=self._world_size_scaling) + # Only support sync allreduce current rank's layer now + dist.all_reduce( + tensor=full_grad, + group=self._group, + use_calc_stream=True) + dist.wait( + tensor=full_grad, + group=self._group, + use_calc_stream=True) + + start, end = self._param2buffer[param.name][self._rank] + if not self._accumulate_grads or param.bw_storage is None: + param.bw_storage = core.VarBase( + full_grad._slice(start, end)).detach().clone() + else: + param.bw_storage.add_( + core.VarBase(full_grad._slice(start, end)).detach() + .clone()) + param.clear_gradient(False) + param._gradient_set_empty(False) + tmp_var = self._task_flow.full_grad.pop(param.name) + tmp_var._clear() + + if param.name in self._task_flow.full_param.keys(): + if param.status == "all": + param.use_count = 0 + param._clear() + start, end = self._param2buffer[param.name][self._rank] + with paddle.amp.auto_cast(enable=False): + param.fw_storage = core.VarBase( + self._task_flow.full_param[param.name]._slice(start, + end), + param.name + "@slice").detach().clone() + param.status = "part" + tmp_var = self._task_flow.full_param.pop(param.name) + tmp_var._clear() + + return reduce + + def _redefine_opt_step(self): + params_slice_func = self._update_params_slice + opt_step = self._optim.step + update_scaler = self._optim.update_scaler + + def _opt_step(self): + if not update_scaler: + params_slice_func() + opt_step() + + self._optim.step = MethodType(_opt_step, self._optim) + + def _redefine_opt_clear(self): + clear_func = self._clear_gradients + + def _opt_clear(self): + clear_func() + + self._optim.clear_grad = MethodType(_opt_clear, self._optim) + + +def ForwardPreHooks(layer, order_tracer, trainable_params, param2buffer, rank, + group, sync_comm, task_flow): + + # Record layer's id + layer_id = id(layer) + use_calc, sync_wait = False, False + + if layer_id not in order_tracer.keys() or sync_comm: + use_calc, sync_wait = True, True + task_flow.use_calc[layer_id] = use_calc + else: + task_flow.use_calc[layer_id] = use_calc + _wait_layer(trainable_params, layer_id, task_flow, group, use_calc) + + if layer_id == order_tracer["layer"][-1]: return + order_ = order_tracer[layer_id] + layer_id = order_tracer["layer"][order_ + 1] + _allgather_buffer( + layer_id, + trainable_params, + group, + use_calc_stream=use_calc, + task_flow=task_flow, + sync_wait=sync_wait) + return + + +class ForwardPostHooks(PyLayer): + @staticmethod + def forward(ctx, inputs, layer, order_tracer, trainable_params, + param2buffer, param2buffer_size, rank, group, sync_comm, + task_flow): + _release_param(layer, trainable_params, param2buffer, rank, task_flow) + + layer_id = id(layer) + if layer_id not in order_tracer.keys(): + order_ = order_tracer["order"] + order_tracer[layer_id] = order_ + order_tracer["order"] += 1 + order_tracer["layer"].append(layer_id) + ctx.order_tracer = order_tracer + ctx.task_flow = task_flow + ctx.group = group + ctx.layer = layer + ctx.sync_comm = sync_comm + ctx.trainable_params = trainable_params + ctx.param2buffer_size = param2buffer_size + + return inputs + + @staticmethod + def backward(ctx, *args): + # Load context value + order_tracer = ctx.order_tracer + task_flow = ctx.task_flow + group = ctx.group + layer = ctx.layer + trainable_params = ctx.trainable_params + param2buffer_size = ctx.param2buffer_size + sync_comm = ctx.sync_comm + layer_id = id(layer) + use_calc, sync_wait = False, False + if sync_comm: + use_calc, sync_wait = True, True + _allgather_buffer( + layer_id, + trainable_params, + group, + use_calc_stream=use_calc, + task_flow=task_flow, + sync_wait=sync_wait) + else: + _wait_layer(trainable_params, layer_id, task_flow, group, use_calc) + _create_params_grad(layer, trainable_params, param2buffer_size, + task_flow) + task_flow.use_calc[layer_id] = use_calc + if layer_id != order_tracer["layer"][0] and not sync_comm: + layer_next_id = order_tracer["layer"][order_tracer[layer_id] - 1] + _allgather_buffer( + layer_next_id, + trainable_params, + group, + use_calc_stream=use_calc, + task_flow=task_flow, + sync_wait=sync_wait) + + return args + + +class TaskFlow: + """ + Task flows, one way linked list for task acquisition. + """ + + def __init__(self, + full_param=dict(), + full_grad=dict(), + use_calc=dict(), + callback=None): + self.full_param = full_param + self.full_grad = full_grad + self.use_calc = use_calc + self.callback = callback + + +def _release_param(layer, trainable_params, param2buffer, rank, task_flow): + for param in trainable_params[id(layer)]: + # async communicate share weight not clear + param.use_count -= 1 + if param.use_count == 0: + param._clear() + if param.name in task_flow.full_param.keys(): + start, end = param2buffer[param.name][rank] + with paddle.amp.auto_cast(enable=False): + param.fw_storage = core.VarBase( + task_flow.full_param[param.name]._slice(start, end), + param.name + "@slice").detach().clone() + param.status = "part" + tmp_var = task_flow.full_param.pop(param.name) + tmp_var._clear() + return + + +def _wait_layer(trainable_params, layer_id, task_flow, group, use_calc_stream): + for param in trainable_params[layer_id]: + if param.status == "all": + param.use_count += 1 + continue + if param.name in task_flow.full_param.keys(): + full_param = task_flow.full_param[param.name] + with paddle.amp.auto_cast(enable=False): + paddle.device.cuda.synchronize() + core.VarBase(full_param._slice(0, param._numel()))._share_buffer_to( + param) + param.value().get_tensor()._set_dims(param.shape) + param.fw_storage._clear() + param.fw_storage = None + param.status = "all" + param.use_count += 1 + else: + _allgather_buffer( + layer_id, + trainable_params, + group, + use_calc_stream, + task_flow, + sync_wait=True) + break + return task_flow + + +def _allgather_buffer(layer_id, + trainable_params, + group, + use_calc_stream, + task_flow, + sync_wait=False): + for param in trainable_params[layer_id]: + if param.status == "all": + param.use_count += 1 + continue + with paddle.amp.auto_cast(enable=False): + full_param = _all_gather( + param.fw_storage, group, use_calc_stream=use_calc_stream) + if sync_wait: + with paddle.amp.auto_cast(enable=False): + dist.wait( + tensor=full_param, + group=group, + use_calc_stream=use_calc_stream) + core.VarBase(full_param._slice(0, param._numel()))._share_buffer_to( + param) + param.value().get_tensor()._set_dims(param.shape) + param.fw_storage._clear() + param.fw_storage = None + param.status = "all" + param.use_count += 1 + task_flow.full_param[param.name] = full_param + return task_flow + + +@paddle.no_grad() +def _create_params_grad(layer, trainable_params, param2buffer_size, task_flow): + for param in trainable_params[id(layer)]: + if param.name in task_flow.full_grad.keys(): + continue + assert isinstance(param2buffer_size[param.name], int) + temp_grad = paddle.zeros( + [param2buffer_size[param.name]], dtype=param.dtype) + param._copy_gradient_from( + core.VarBase(temp_grad._slice(0, param._numel()))) + task_flow.full_grad[param.name] = temp_grad + return task_flow + + +def _PartitionParam(param): + if not hasattr(param, "fw_storage"): + setattr(param, "fw_storage", None) + setattr(param, "bw_storage", None) + setattr(param, "status", "all") + setattr(param, "use_count", 0) + return param + + +def _VarBaseWrapper(param): + varbase = param.fw_storage + tmp_param = ParamBase( + shape=varbase.shape, dtype=varbase.dtype, name="slice@" + param.name) + varbase._share_buffer_to(tmp_param) + tmp_param.regularizer = param.regularizer + tmp_param.optimize_attr['learning_rate'] = param.optimize_attr[ + 'learning_rate'] + varbase._clear() + return tmp_param + + +def _OptimizerWrapper(optimizer, offload, group, update_params_slice): + if not hasattr(optimizer, "_optim"): + setattr(optimizer, "_optim", optimizer) + setattr(optimizer, "offload", offload) + setattr(optimizer, "group", group) + setattr(optimizer, "update_scaler", None) + setattr(optimizer, "update_slice", update_params_slice) + return optimizer + + +def _current_layer_params(layer): + return layer.parameters( + include_sublayers=False) + list(layer.extra_parameters) if hasattr( + layer, "extra_parameters") else layer.parameters( + include_sublayers=False) diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py index 272aada576be8..5f696195c1abc 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py @@ -152,6 +152,9 @@ def unscale_method(self, optimizer): param_grads = [] param_grads_fp16 = [] param_grads_fp32 = [] + if hasattr(optimizer, "update_slice"): + optimizer.update_slice() + optimizer.update_scaler = True if getattr(optimizer._optim, '_param_groups', None) and isinstance( optimizer._optim._param_groups[0], dict): @@ -161,27 +164,21 @@ def unscale_method(self, optimizer): if param._grad_ivar() is not None: param_grads.append(param._grad_ivar()) if param._grad_ivar( - ).dtype == core.VarDesc.VarType.FP16: + ).dtype in [core.VarDesc.VarType.FP16, paddle.float16]: param_grads_fp16.append(param._grad_ivar()) else: param_grads_fp32.append(param._grad_ivar()) else: - param_grads = [ - param._grad_ivar() for param in optimizer._optim._parameter_list - if param._grad_ivar() is not None - ] - param_grads_fp16 = [ - param._grad_ivar() for param in optimizer._optim._parameter_list - if (param._grad_ivar() is not None - ) and (param._grad_ivar().dtype == core.VarDesc.VarType.FP16 - ) - ] - param_grads_fp32 = [ - param._grad_ivar() for param in optimizer._optim._parameter_list - if (param._grad_ivar() is not None - ) and (param._grad_ivar().dtype == core.VarDesc.VarType.FP32 - ) - ] + for param in optimizer._optim._parameter_list: + if param.grad is not None: + param_grads.append(param.grad) + if param.grad.dtype in [ + core.VarDesc.VarType.FP16, paddle.float16 + ]: + param_grads_fp16.append(param.grad) + else: + param_grads_fp32.append(param.grad) + temp_found_inf_fp16 = to_variable(np.array([0]).astype(np.bool)) temp_found_inf_fp32 = to_variable(np.array([0]).astype(np.bool)) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 67697fcfd8398..c0c13866ccd55 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -34,6 +34,7 @@ list(APPEND DIST_TEST_OPS test_parallel_dygraph_tensor_parallel) list(APPEND DIST_TEST_OPS test_parallel_dygraph_sharding_parallel) list(APPEND DIST_TEST_OPS test_dygraph_sharding_optimizer_stage2) list(APPEND DIST_TEST_OPS test_dygraph_sharding_stage2) +list(APPEND DIST_TEST_OPS test_dygraph_sharding_stage3) list(APPEND DIST_TEST_OPS test_auto_parallel_parallelizer) list(APPEND DIST_TEST_OPS test_parallel_dygraph_mp_layers) list(APPEND DIST_TEST_OPS test_hybrid_parallel_inference_helper) @@ -250,6 +251,7 @@ if ((NOT WITH_GPU) AND (NOT WITH_ROCM)) list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sharding_parallel) list(REMOVE_ITEM TEST_OPS test_dygraph_sharding_optimizer_stage2) list(REMOVE_ITEM TEST_OPS test_dygraph_sharding_stage2) + list(REMOVE_ITEM TEST_OPS test_dygraph_sharding_stage3) list(REMOVE_ITEM TEST_OPS test_auto_parallel_parallelizer) list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_mp_layers) LIST(REMOVE_ITEM TEST_OPS test_imperative_auto_mixed_precision) @@ -1058,6 +1060,7 @@ if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL) set_tests_properties(test_parallel_dygraph_sharding_parallel PROPERTIES TIMEOUT 120) set_tests_properties(test_dygraph_sharding_optimizer_stage2 PROPERTIES TIMEOUT 120) set_tests_properties(test_dygraph_sharding_stage2 PROPERTIES TIMEOUT 120) + set_tests_properties(test_dygraph_sharding_stage3 PROPERTIES TIMEOUT 120) set_tests_properties(test_auto_parallel_parallelizer PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_dygraph_mp_layers PROPERTIES TIMEOUT 120) set_tests_properties(test_hybrid_parallel_inference_helper PROPERTIES TIMEOUT 120) diff --git a/python/paddle/fluid/tests/unittests/dygraph_sharding_stage3.py b/python/paddle/fluid/tests/unittests/dygraph_sharding_stage3.py new file mode 100644 index 0000000000000..5b0bec9c454b0 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dygraph_sharding_stage3.py @@ -0,0 +1,233 @@ +# -*- coding: UTF-8 -*- + +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import argparse +import ast +import time +import paddle +import paddle.fluid as fluid +from paddle.fluid.dygraph.nn import Linear +from paddle.distributed import fleet +from paddle.fluid.dygraph import nn + +from paddle.distributed.fleet.meta_optimizers.dygraph_optimizer.sharding_optimizer_stage2 import ShardingOptimizerStage2 +from paddle.distributed.fleet.meta_parallel.sharding.sharding_stage2 import ShardingStage2 +from paddle.distributed.fleet.meta_parallel.sharding.sharding_stage3 import ShardingStage3 +from paddle.distributed.fleet.meta_parallel.sharding.sharding_utils import ShardingScaler + +epoch = 10 +batch_size = 32 +paddle.seed(2021) +np.random.seed(2021) +base_lr = 0.1 +momentum_rate = 0.9 +l2_decay = 1e-4 +fleet.init(is_collective=True) + + +class MLP(fluid.Layer): + def __init__(self, linear_size=1000, param_attr=None, bias_attr=None): + super(MLP, self).__init__() + + self._linear1 = Linear(linear_size, linear_size) + self._linear2 = Linear(linear_size, linear_size) + self._linear3 = Linear(linear_size, 10) + + def forward(self, inputs): + y = self._linear1(inputs) + y = self._linear2(y) + y = self._linear3(y) + return y + + +def reader_decorator(linear_size=1000): + def __reader__(): + for _ in range(100): + img = np.random.rand(linear_size).astype('float32') + label = np.ones(1).astype('int64') + yield img, label + + return __reader__ + + +def optimizer_setting(model, use_pure_fp16, opt_group=False): + clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0) + optimizer = paddle.optimizer.AdamW( + parameters=[{ + "params": model.parameters() + }] if opt_group else model.parameters(), + learning_rate=0.001, + weight_decay=0.00001, + grad_clip=clip, + multi_precision=use_pure_fp16) + + return optimizer + + +def train_mlp(model, + sharding_stage, + use_pure_fp16=False, + accumulate_grad=False, + opt_group=False, + recompute=False): + group = paddle.distributed.new_group([0, 1]) + if opt_group: + optimizer = optimizer_setting( + model=model, use_pure_fp16=use_pure_fp16, opt_group=opt_group) + else: + optimizer = optimizer_setting(model=model, use_pure_fp16=use_pure_fp16) + + if use_pure_fp16: + model = paddle.amp.decorate( + models=model, level='O2', save_dtype='float32') + scaler = paddle.amp.GradScaler(init_loss_scaling=32768) + scaler = ShardingScaler(scaler) + if sharding_stage == 2: + optimizer = ShardingOptimizerStage2( + params=model.parameters(), optim=optimizer, group=group) + model = ShardingStage2( + model, + optimizer, + group=group, + buffer_max_size=2**21, + accumulate_grads=accumulate_grad) + elif sharding_stage == 3: + model = ShardingStage3( + model, optimizer=optimizer, group=group, sync_comm=recompute) + + train_reader = paddle.batch( + reader_decorator(), batch_size=batch_size, drop_last=True) + + train_loader = paddle.io.DataLoader.from_generator( + capacity=32, + use_double_buffer=True, + iterable=True, + return_list=True, + use_multiprocess=True) + train_loader.set_sample_list_generator(train_reader) + + for eop in range(epoch): + model.train() + for batch_id, data in enumerate(train_loader()): + img, label = data + label.stop_gradient = True + img.stop_gradient = True + with paddle.amp.auto_cast(True, level='O2'): + out = model(img) + loss = paddle.nn.functional.cross_entropy( + input=out, label=label) + avg_loss = paddle.mean(x=loss.cast(dtype=paddle.float32)) + if not accumulate_grad: + if not use_pure_fp16: + avg_loss.backward() + optimizer.step() + else: + scaler.scale(avg_loss).backward() + scaler.step(optimizer) + scaler.update() + optimizer.clear_grad() + if accumulate_grad: + if not use_pure_fp16: + avg_loss.backward() + optimizer.step() + else: + scaler.scale(avg_loss).backward() + scaler.step(optimizer) + scaler.update() + optimizer.clear_grad() + if sharding_stage == 3: + model.get_all_parameters() + return model.parameters() + + +def test_stage2_stage3(): + mlp, mlp1, mlp2, mlp3, mlp4, mlp5, mlp6, mlp7, mlp8 = MLP(), MLP(), MLP( + ), MLP(), MLP(), MLP(), MLP(), MLP(), MLP() + state_dict = mlp.state_dict() + mlp1.set_state_dict(state_dict) + mlp2.set_state_dict(state_dict) + mlp3.set_state_dict(state_dict) + mlp4.set_state_dict(state_dict) + mlp5.set_state_dict(state_dict) + mlp6.set_state_dict(state_dict) + mlp7.set_state_dict(state_dict) + mlp8.set_state_dict(state_dict) + # fp32 + stage2_params = train_mlp( + mlp1, sharding_stage=2, use_pure_fp16=False, opt_group=True) + stage3_params = train_mlp( + mlp2, sharding_stage=3, use_pure_fp16=False, opt_group=True) + for i in range(len(stage2_params)): + for j in range(len(stage3_params)): + if stage2_params[i].name == stage3_params[j].name: + np.testing.assert_allclose( + stage2_params[i].numpy(), + stage3_params[j].numpy(), + rtol=1e-6) + # fp32 accumulate grad + stage2_params = train_mlp( + mlp3, + sharding_stage=2, + use_pure_fp16=False, + accumulate_grad=True, + opt_group=True) + stage3_params = train_mlp( + mlp4, + sharding_stage=3, + use_pure_fp16=False, + accumulate_grad=True, + opt_group=True) + for i in range(len(stage2_params)): + for j in range(len(stage3_params)): + if stage2_params[i].name == stage3_params[j].name: + np.testing.assert_allclose( + stage2_params[i].numpy(), + stage3_params[j].numpy(), + rtol=1e-6) + # fp16 + stage2_params = train_mlp( + mlp5, sharding_stage=2, use_pure_fp16=True, opt_group=False) + stage3_params = train_mlp( + mlp6, sharding_stage=3, use_pure_fp16=True, opt_group=False) + for i in range(len(stage2_params)): + for j in range(len(stage3_params)): + if stage2_params[i].name == stage3_params[j].name: + np.testing.assert_allclose( + stage2_params[i].numpy(), + stage3_params[j].numpy(), + rtol=1e-6) + # fp16 recompute + stage3_params = train_mlp( + mlp7, sharding_stage=3, use_pure_fp16=True, opt_group=False) + stage3_params_re = train_mlp( + mlp8, + sharding_stage=3, + use_pure_fp16=True, + opt_group=False, + recompute=True) + for i in range(len(stage3_params)): + for j in range(len(stage3_params_re)): + if stage3_params[i].name == stage3_params_re[j].name: + np.testing.assert_allclose( + stage3_params[i].numpy(), + stage3_params_re[j].numpy(), + rtol=1e-6) + return + + +if __name__ == '__main__': + test_stage2_stage3() diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_sharding_stage3.py b/python/paddle/fluid/tests/unittests/test_dygraph_sharding_stage3.py new file mode 100644 index 0000000000000..89d5f2e8c7b29 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dygraph_sharding_stage3.py @@ -0,0 +1,31 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import paddle.fluid as fluid + +from test_parallel_dygraph_dataparallel import TestMultipleGpus + + +class TestDygraphShardingStage3(TestMultipleGpus): + + # check sharding logic as well as the accuracy with single mode + def test_dygraph_sharding_optimizer_stage3(self): + self.run_mnist_2gpu('dygraph_sharding_stage3.py') + + +if __name__ == "__main__": + unittest.main() From 0de8a805a89eb70203163a34858ff504afff30df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=98=8E=E5=86=AC?= <78149749+winter-wang@users.noreply.github.com> Date: Fri, 14 Jan 2022 16:05:00 +0800 Subject: [PATCH 02/10] [infrt] update the version of llvm. test=develop (#38843) --- cmake/external/llvm.cmake | 13 +- paddle/infrt/CMakeLists.txt | 1 - paddle/infrt/common/global.h | 2 +- paddle/infrt/dialect/CMakeLists.txt | 6 +- paddle/infrt/dialect/basic_kernels.cc | 22 +-- paddle/infrt/dialect/basic_kernels.h | 5 +- paddle/infrt/dialect/basic_kernels.td | 7 +- paddle/infrt/dialect/dense_tensor.cc | 148 +++++------------- paddle/infrt/dialect/dense_tensor.h | 51 ++++-- paddle/infrt/dialect/diagnostic_utils.cc | 7 +- paddle/infrt/dialect/diagnostic_utils.h | 6 +- paddle/infrt/dialect/dialect.cc | 16 +- paddle/infrt/dialect/infrt_base.cc | 6 +- paddle/infrt/dialect/infrt_base.h | 32 ++-- paddle/infrt/dialect/infrt_base.td | 6 +- paddle/infrt/dialect/init_infrt_dialects.cc | 12 +- paddle/infrt/dialect/init_infrt_dialects.h | 8 +- paddle/infrt/dialect/mlir_loader.cc | 18 ++- paddle/infrt/dialect/mlir_loader.h | 9 +- paddle/infrt/dialect/mlir_loader_test.cc | 11 +- paddle/infrt/dialect/mlir_tests/rewrite.mlir | 2 +- .../dialect/mlir_tests/rewrite_conv_bn.mlir | 2 +- paddle/infrt/dialect/mlir_tests/trt_ops.mlir | 2 +- paddle/infrt/dialect/ops.td | 6 - paddle/infrt/dialect/opt.cc | 26 +-- paddle/infrt/dialect/pd_op_base.td | 2 +- paddle/infrt/dialect/pd_ops.cc | 29 ++-- paddle/infrt/dialect/pd_ops.h | 36 ++--- paddle/infrt/dialect/pd_ops.td | 14 +- paddle/infrt/dialect/pd_types.h | 11 +- paddle/infrt/dialect/print_ir.cc | 45 +++--- paddle/infrt/dialect/tensor_shape.cc | 16 +- paddle/infrt/dialect/tensor_shape.h | 8 +- paddle/infrt/dialect/tensor_shape_base.td | 4 +- paddle/infrt/dialect/tensorrt/trt_exec.cc | 4 +- .../dialect/tensorrt/trt_graph_fuse_pass.cc | 78 +++++---- .../dialect/tensorrt/trt_graph_fuse_pass.h | 12 +- .../dialect/tensorrt/trt_graph_split_pass.cc | 20 +-- .../dialect/tensorrt/trt_graph_split_pass.h | 10 +- .../dialect/tensorrt/trt_op_teller_pass.cc | 25 ++- .../dialect/tensorrt/trt_op_teller_pass.h | 14 +- paddle/infrt/dialect/tensorrt/trt_ops.cc | 22 ++- paddle/infrt/dialect/tensorrt/trt_ops.h | 41 +++-- paddle/infrt/dialect/test_kernels.cc | 75 ++++----- paddle/infrt/dialect/test_kernels.h | 7 +- paddle/infrt/dialect/types.cc | 17 -- paddle/infrt/dialect/types.h | 16 -- paddle/infrt/host_context/core_runtime.cc | 6 +- paddle/infrt/host_context/core_runtime.h | 6 +- paddle/infrt/host_context/kernel_frame.h | 6 +- .../host_context/kernel_registry_test.cc | 6 +- .../infrt/host_context/kernel_utils_test.cc | 6 +- .../host_context/mlir_function_executable.cc | 1 + .../host_context/mlir_function_executable.h | 3 +- .../host_context/mlir_program_executor.h | 4 +- .../host_context/mlir_to_runtime_translate.cc | 90 ++++++----- .../host_context/mlir_to_runtime_translate.h | 8 +- .../mlir_to_runtime_translate_test.cc | 12 +- paddle/infrt/host_context/op_executable.cc | 7 +- paddle/infrt/host_context/op_executable.h | 12 +- paddle/infrt/kernel/basic_kernels.cc | 6 +- paddle/infrt/kernel/basic_kernels.h | 12 +- paddle/infrt/kernel/tensor_kernels.cc | 6 +- paddle/infrt/kernel/tensor_kernels.h | 12 +- paddle/infrt/kernel/tensor_shape_kernels.cc | 6 +- paddle/infrt/kernel/tensor_shape_kernels.h | 12 +- paddle/infrt/kernel/test_kernels.cc | 6 +- paddle/infrt/kernel/test_kernels.h | 12 +- paddle/infrt/paddle/cpp/desc_api.h | 8 +- paddle/infrt/paddle/model_parser.cc | 6 +- paddle/infrt/paddle/model_parser.h | 6 +- paddle/infrt/paddle/pb/block_desc.cc | 8 +- paddle/infrt/paddle/pb/block_desc.h | 8 +- paddle/infrt/paddle/pb/op_desc.cc | 8 +- paddle/infrt/paddle/pb/op_desc.h | 8 +- paddle/infrt/paddle/pb/program_desc.cc | 8 +- paddle/infrt/paddle/pb/program_desc.h | 8 +- paddle/infrt/paddle/pb/var_desc.cc | 8 +- paddle/infrt/paddle/pb/var_desc.h | 8 +- 79 files changed, 616 insertions(+), 637 deletions(-) delete mode 100644 paddle/infrt/dialect/ops.td delete mode 100644 paddle/infrt/dialect/types.cc delete mode 100644 paddle/infrt/dialect/types.h diff --git a/cmake/external/llvm.cmake b/cmake/external/llvm.cmake index e080a7359af98..27210e5260048 100644 --- a/cmake/external/llvm.cmake +++ b/cmake/external/llvm.cmake @@ -1,7 +1,7 @@ include(FetchContent) -set(LLVM_DOWNLOAD_URL https://paddle-inference-dist.bj.bcebos.com/CINN/llvm11.tar.gz) -set(LLVM_MD5 39d32b6be466781dddf5869318dcba53) +set(LLVM_DOWNLOAD_URL https://paddle-inference-dist.bj.bcebos.com/infrt/llvm_b5149f4e66a49a98b67e8e2de4e24a4af8e2781b.tar.gz) +set(LLVM_MD5 022819bb5760817013cf4b8a37e97d5e) set(FETCHCONTENT_BASE_DIR ${THIRD_PARTY_PATH}/llvm) set(FETCHCONTENT_QUIET OFF) @@ -51,7 +51,7 @@ message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") # To build with MLIR, the LLVM is build from source code using the following flags: #[==[ -cmake -G Ninja ../llvm \ +cmake ../llvm -G "Unix Makefiles" \ -DLLVM_ENABLE_PROJECTS="mlir;clang" \ -DLLVM_BUILD_EXAMPLES=OFF \ -DLLVM_TARGETS_TO_BUILD="X86" \ @@ -59,8 +59,10 @@ cmake -G Ninja ../llvm \ -DLLVM_ENABLE_ASSERTIONS=ON \ -DLLVM_ENABLE_ZLIB=OFF \ -DLLVM_ENABLE_RTTI=ON \ + -DLLVM_INSTALL_UTILS=ON \ + -DCMAKE_INSTALL_PREFIX=./install #]==] -# The matched llvm-project version is f9dc2b7079350d0fed3bb3775f496b90483c9e42 (currently a temporary commit) +# The matched llvm-project version is b5149f4e66a49a98b67e8e2de4e24a4af8e2781b (currently a temporary commit) add_definitions(${LLVM_DEFINITIONS}) @@ -75,7 +77,7 @@ add_definitions(${LLVM_DEFINITIONS}) # The minimum needed libraries for MLIR IR parse and transform. -set(MLIR_IR_LIBS MLIRAnalysis MLIRStandardOps MLIRPass MLIRParser MLIRDialect MLIRIR MLIROptLib) +set(MLIR_IR_LIBS MLIRAnalysis MLIRPass MLIRParser MLIRDialect MLIRIR MLIROptLib) # tb_base is the name of a xxx.td file (without the .td suffix) @@ -89,6 +91,7 @@ function(mlir_tablegen_on td_base) mlir_tablegen(${td_base}.cpp.inc -gen-op-defs) if (mlir_tablegen_on_DIALECT) mlir_tablegen(${td_base}_dialect.hpp.inc --gen-dialect-decls -dialect=${mlir_tablegen_on_DIALECT}) + mlir_tablegen(${td_base}_dialect.cpp.inc --gen-dialect-defs -dialect=${mlir_tablegen_on_DIALECT}) endif() add_public_tablegen_target(${td_base}_IncGen) add_custom_target(${td_base}_inc DEPENDS ${td_base}_IncGen) diff --git a/paddle/infrt/CMakeLists.txt b/paddle/infrt/CMakeLists.txt index 8f05d286bf033..8af3012a220ad 100644 --- a/paddle/infrt/CMakeLists.txt +++ b/paddle/infrt/CMakeLists.txt @@ -77,7 +77,6 @@ add_subdirectory(paddle) # MLIR td file generations set(infrt_mlir_incs - ops_inc basic_kernels_inc test_kernels_inc infrt_base_inc diff --git a/paddle/infrt/common/global.h b/paddle/infrt/common/global.h index f89164d03f31d..e6586cb3a3c60 100644 --- a/paddle/infrt/common/global.h +++ b/paddle/infrt/common/global.h @@ -14,7 +14,7 @@ #pragma once -#include "mlir/IR/MLIRContext.h" +#include #include "paddle/infrt/tensor/dense_host_tensor.h" namespace infrt { diff --git a/paddle/infrt/dialect/CMakeLists.txt b/paddle/infrt/dialect/CMakeLists.txt index d145843684c63..c064b2145266b 100644 --- a/paddle/infrt/dialect/CMakeLists.txt +++ b/paddle/infrt/dialect/CMakeLists.txt @@ -2,7 +2,6 @@ core_gather_headers() gather_srcs(infrt_src SRCS dialect.cc - types.cc basic_kernels.cc test_kernels.cc infrt_base.cc @@ -14,8 +13,6 @@ gather_srcs(infrt_src SRCS pd_types.cc pd_ops.cc ) - -mlir_tablegen_on(ops) mlir_tablegen_on(basic_kernels) mlir_tablegen_on(test_kernels) mlir_tablegen_on(infrt_base DIALECT infrt) @@ -27,8 +24,7 @@ mlir_add_rewriter(rewrite) # TODO(Superjomn) add a cmake function cc_executable to ecapsulate the following code add_executable(infrtopt opt.cc) -target_link_libraries(infrtopt infrt ${mlir_libs}) -add_dependencies(infrtopt infrt) +target_link_libraries(infrtopt infrt) add_executable(print-ir print_ir.cc) target_link_libraries(print-ir infrt ${mlir_libs}) diff --git a/paddle/infrt/dialect/basic_kernels.cc b/paddle/infrt/dialect/basic_kernels.cc index b4d2b9182b0c5..bad7e73ec5ae5 100644 --- a/paddle/infrt/dialect/basic_kernels.cc +++ b/paddle/infrt/dialect/basic_kernels.cc @@ -17,17 +17,17 @@ #include #include #include -#include -#include +#include +#include #include #include -#include #include #include #include "paddle/infrt/dialect/dense_tensor.h" -namespace infrt::dialect { +namespace infrt { +namespace dialect { using namespace mlir; // NOLINT static ParseResult parseCallOp(OpAsmParser &parser, // NOLINT @@ -71,12 +71,12 @@ static ParseResult parseConstantF64Op(OpAsmParser &parser, // NOLINT static ParseResult parseConstantI32Op(OpAsmParser &parser, // NOLINT OperationState &result) { // NOLINT return parseConstantOp( - IntegerType::get(32, result.getContext()), parser, result); + IntegerType::get(result.getContext(), 32), parser, result); } static ParseResult parseConstantI64Op(OpAsmParser &parser, // NOLINT OperationState &result) { // NOLINT return parseConstantOp( - IntegerType::get(64, result.getContext()), parser, result); + IntegerType::get(result.getContext(), 64), parser, result); } static ParseResult parseReturnOp(OpAsmParser &parser, // NOLINT @@ -90,10 +90,10 @@ static ParseResult parseReturnOp(OpAsmParser &parser, // NOLINT } static void print(OpAsmPrinter &p, CallOp op) { // NOLINT - p << "infrt.call " << op.getAttr("callee") << "("; + p << "infrt.call " << op->getAttr("callee") << "("; p.printOperands(op.getOperands()); p << ")"; - p.printOptionalAttrDict(op.getAttrs(), {"callee"}); + p.printOptionalAttrDict(op->getAttrs(), {"callee"}); p << " : "; } @@ -145,7 +145,7 @@ static LogicalResult verify(ConstantF64Op op) { return success(); } static LogicalResult verify(ConstantI64Op op) { return success(); } static LogicalResult verify(ReturnOp op) { - auto function = dyn_cast(op.getParentOp()); + auto function = dyn_cast(op->getParentOp()); if (!function) return success(); @@ -157,8 +157,8 @@ static LogicalResult verify(ReturnOp op) { return success(); } +} // namespace dialect +} // namespace infrt #define GET_OP_CLASSES #include "paddle/infrt/dialect/basic_kernels.cpp.inc" - -} // namespace infrt::dialect diff --git a/paddle/infrt/dialect/basic_kernels.h b/paddle/infrt/dialect/basic_kernels.h index 65316bc1437c0..b82abcd52d28f 100644 --- a/paddle/infrt/dialect/basic_kernels.h +++ b/paddle/infrt/dialect/basic_kernels.h @@ -13,12 +13,9 @@ // limitations under the License. #pragma once +#include #include #include -using namespace mlir; // NOLINT - -namespace infrt::dialect { #define GET_OP_CLASSES #include "paddle/infrt/dialect/basic_kernels.hpp.inc" -} // namespace infrt::dialect diff --git a/paddle/infrt/dialect/basic_kernels.td b/paddle/infrt/dialect/basic_kernels.td index df5e4d8a2c6a1..7d8de79fbae2b 100644 --- a/paddle/infrt/dialect/basic_kernels.td +++ b/paddle/infrt/dialect/basic_kernels.td @@ -27,7 +27,7 @@ def CallOp : INFRT_Op<"call"> { let results = (outs Variadic); let extraClassDeclaration = [{ - StringRef getCallee() { return callee(); } + mlir::StringRef getCallee() { return callee(); } mlir::FunctionType getCalleeType(); }]; } @@ -57,9 +57,8 @@ def ReturnOp : INFRT_Op<"return", [Terminator]> { let arguments = (ins Variadic:$operands); - let builders = [OpBuilder< - "OpBuilder &b, OperationState &result", - [{ build(b, result, llvm::None); }]>]; + let builders = [OpBuilder<(ins), + [{ build($_builder, $_state, llvm::None); }]>]; } class AddOp : INFRT_Op<"add." # suffix, [NoSideEffect]> { diff --git a/paddle/infrt/dialect/dense_tensor.cc b/paddle/infrt/dialect/dense_tensor.cc index 629a7b16523fc..7685cdc65b9ad 100644 --- a/paddle/infrt/dialect/dense_tensor.cc +++ b/paddle/infrt/dialect/dense_tensor.cc @@ -17,12 +17,11 @@ #include #include #include +#include +#include #include -#include -#include #include #include -#include #include #include @@ -31,68 +30,37 @@ #include "paddle/infrt/common/global.h" #include "paddle/infrt/dialect/tensor_shape.h" -namespace infrt::dt { - +namespace infrt { +namespace dt { void DTDialect::initialize() { - allowUnknownTypes(); addOperations< #define GET_OP_LIST #include "paddle/infrt/dialect/dense_tensor.cpp.inc" >(); } -namespace detail { -struct TensorTypeStorage : public mlir::TypeStorage { - TensorTypeStorage(TargetType target, - LayoutType layout, - PrecisionType precision) - : target_(target), layout_(layout), precision_(precision) {} - - using KeyTy = std::tuple; - - bool operator==(const KeyTy &key) const { - return key == KeyTy(target_, layout_, precision_); - } - - static llvm::hash_code hashKey(const KeyTy &key) { - return llvm::hash_value(key); - } - - static TensorTypeStorage *construct( - mlir::TypeStorageAllocator &allocator, // NOLINT - const KeyTy &key) { - return new (allocator.allocate()) - TensorTypeStorage(std::get<0>(key), std::get<1>(key), std::get<2>(key)); - } - - TargetType target_; - LayoutType layout_; - PrecisionType precision_; -}; -} // namespace detail - llvm::Optional GetTargetType(mlir::StringRef key) { - if (key.equals_lower("x86")) + if (key.equals_insensitive("x86")) return TargetType::X86; - else if (key.equals_lower("cuda")) + else if (key.equals_insensitive("cuda")) return TargetType::CUDA; else return llvm::None; } llvm::Optional GetLayoutType(mlir::StringRef key) { - if (key.equals_lower("nchw")) + if (key.equals_insensitive("nchw")) return LayoutType::NCHW; - else if (key.equals_lower("nhwc")) + else if (key.equals_insensitive("nhwc")) return LayoutType::NHWC; else return llvm::None; } llvm::Optional GetPrecisionType(mlir::StringRef key) { - if (key.equals_lower("i32")) + if (key.equals_insensitive("i32")) return PrecisionType::I32; - else if (key.equals_lower("f32")) + else if (key.equals_insensitive("f32")) return PrecisionType::F32; else return llvm::None; @@ -111,7 +79,7 @@ LayoutType TensorType::layout() { return getImpl()->layout_; } PrecisionType TensorType::precision() { return getImpl()->precision_; } -raw_ostream &operator<<(raw_ostream &os, TensorType tensorType) { +mlir::raw_ostream &operator<<(mlir::raw_ostream &os, TensorType tensorType) { os << "TensorType<" << tensorType.target() << ", " << tensorType.layout() << ", " << tensorType.precision() << ">"; return os; @@ -133,7 +101,7 @@ StringType StringType::get(mlir::MLIRContext *context) { return Base::get(context); } -raw_ostream &operator<<(raw_ostream &os, TargetType type) { +mlir::raw_ostream &operator<<(mlir::raw_ostream &os, TargetType type) { switch (type) { case (TargetType::X86): os << "X86"; @@ -147,7 +115,7 @@ raw_ostream &operator<<(raw_ostream &os, TargetType type) { return os; } -raw_ostream &operator<<(raw_ostream &os, LayoutType type) { +mlir::raw_ostream &operator<<(mlir::raw_ostream &os, LayoutType type) { switch (type) { case (LayoutType::NCHW): os << "NCHW"; @@ -161,7 +129,7 @@ raw_ostream &operator<<(raw_ostream &os, LayoutType type) { return os; } -raw_ostream &operator<<(raw_ostream &os, PrecisionType type) { +mlir::raw_ostream &operator<<(mlir::raw_ostream &os, PrecisionType type) { switch (type) { case (PrecisionType::I32): os << "I32"; @@ -175,103 +143,69 @@ raw_ostream &operator<<(raw_ostream &os, PrecisionType type) { return os; } -static Type getTensorType(mlir::MLIRContext *context) { - auto t_dialect = Identifier::get("t", context); - return OpaqueType::get(t_dialect, "tensor", context); +static mlir::Type getTensorType(mlir::MLIRContext *context) { + auto t_dialect = mlir::Identifier::get("t", context); + return mlir::OpaqueType::get(t_dialect, "tensor"); } -static ParseResult parseCreateUninitTensorOp( - OpAsmParser &parser, // NOLINT - OperationState &result) { // NOLINT +static mlir::ParseResult parseCreateUninitTensorOp( + mlir::OpAsmParser &parser, // NOLINT + mlir::OperationState &result) { // NOLINT auto loc = parser.getCurrentLocation(); - ::mlir::Type outputRawTypes[1]; - ::llvm::ArrayRef<::mlir::Type> outputTypes(outputRawTypes); + mlir::Type outputRawTypes[1]; + ::llvm::ArrayRef outputTypes(outputRawTypes); mlir::ArrayAttr shapeAttr; if (parser.parseAttribute(shapeAttr, parser.getBuilder().getI64Type(), "shape", result.attributes)) - return failure(); - if (parser.parseOptionalAttrDict(result.attributes)) return failure(); + return mlir::failure(); + if (parser.parseOptionalAttrDict(result.attributes)) return mlir::failure(); - if (parser.parseArrow()) return failure(); - if (parser.parseType(outputRawTypes[0])) return failure(); + if (parser.parseArrow()) return mlir::failure(); + if (parser.parseType(outputRawTypes[0])) return mlir::failure(); if (!outputRawTypes[0].isa()) return parser.emitError(loc, "invalid kind of type specified"); result.addTypes(outputTypes); - return success(); + return mlir::success(); } template -static void printCreateUninitTensorOp(OpAsmPrinter &p, // NOLINT +static void printCreateUninitTensorOp(mlir::OpAsmPrinter &p, // NOLINT CreateUninitTensorOp op) { p << CreateUninitTensorOp::getOperationName(); p << " "; p.printAttributeWithoutType(op.shapeAttr()); - p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{"shape"}); + p.printOptionalAttrDict(op->getAttrs(), /*elidedAttrs=*/{"shape"}); p << " -> "; p << op.getOperation()->getResultTypes(); } -// TODO(shibo): can be removed? -// static ParseResult parseFillTensorWithConstantOp(OpAsmParser& parser, -// OperationState& result) { -// auto loc = parser.getCurrentLocation(); -// ::mlir::OpAsmParser::OperandType inputRawOperands[1]; -// ::llvm::ArrayRef<::mlir::OpAsmParser::OperandType> -// inputOperands(inputRawOperands); -// ::mlir::Type inputRawTypes[1]; -// ::llvm::ArrayRef<::mlir::Type> inputTypes(inputRawTypes); -// -// if (parser.parseOperand(inputRawOperands[0])) return failure(); -// -// if (parser.parseColon()) return failure(); -// if (parser.parseType(inputRawTypes[0])) return failure(); -// if (!inputRawTypes[0].isa()) -// return parser.emitError(loc, "invalid kind of type specified"); -// -// Attribute value_attr; -// if (parser.resolveOperands(inputOperands, inputTypes, loc, result.operands)) -// return failure(); -// if (parser.parseAttribute(value_attr, "value", result.attributes)) return -// failure(); -// return success(); -//} - -// TODO(shibo): can be removed? -// template -// static void printFillTensorWithConstantOp(OpAsmPrinter& p, FillTensorOp op) { -// p << FillTensorOp::getOperationName(); -// p << " "; -// p.printOperand(op.getOperand()); -// p << " : "; -// p << op.getOperation()->getOperandTypes(); -// p << " "; -// p << op.getAttr("value"); -//} - -static ParseResult parseSetTensorOp(OpAsmParser &parser, // NOLINT - OperationState &result) { // NOLINT - SmallVector operands; - if (parser.parseOperandList(operands, 1)) return failure(); +static mlir::ParseResult parseSetTensorOp( + mlir::OpAsmParser &parser, // NOLINT + mlir::OperationState &result) { // NOLINT + llvm::SmallVector operands; + if (parser.parseOperandList(operands, 1)) return mlir::failure(); auto tensor_type = getTensorType(result.getContext()); - Attribute value_attr; - return failure( + mlir::Attribute value_attr; + return mlir::failure( parser.resolveOperand(operands[0], tensor_type, result.operands) || parser.parseAttribute(value_attr, "values", result.attributes)); } template -static void printSetTensorOp(OpAsmPrinter &p, SetTensorOp op) { // NOLINT +static void printSetTensorOp(mlir::OpAsmPrinter &p, SetTensorOp op) { // NOLINT p << SetTensorOp::getOperationName() << " "; p.printOperand(op.getOperand()); - p << " " << op.getAttr("values"); + p << " " << op->getAttr("values"); } +} // namespace dt +} // namespace infrt #define GET_OP_CLASSES #include "paddle/infrt/dialect/dense_tensor.cpp.inc" // NOLINT -} // namespace infrt::dt +#include "paddle/infrt/dialect/dense_tensor_dialect.cpp.inc" diff --git a/paddle/infrt/dialect/dense_tensor.h b/paddle/infrt/dialect/dense_tensor.h index 866c62213ab05..416925d3382ba 100644 --- a/paddle/infrt/dialect/dense_tensor.h +++ b/paddle/infrt/dialect/dense_tensor.h @@ -19,13 +19,8 @@ #include -using namespace mlir; // NOLINT -namespace infrt::dt { - -namespace detail { -struct TensorTypeStorage; -} // namespace detail - +namespace infrt { +namespace dt { enum class TargetType : uint8_t { X86, CUDA }; enum class LayoutType : uint8_t { NCHW, NHWC }; enum class PrecisionType : uint8_t { I32, F32 }; @@ -34,9 +29,39 @@ llvm::Optional GetTargetType(mlir::StringRef key); llvm::Optional GetLayoutType(mlir::StringRef key); llvm::Optional GetPrecisionType(mlir::StringRef key); -raw_ostream &operator<<(raw_ostream &os, TargetType type); -raw_ostream &operator<<(raw_ostream &os, LayoutType type); -raw_ostream &operator<<(raw_ostream &os, PrecisionType type); +mlir::raw_ostream &operator<<(mlir::raw_ostream &os, TargetType type); +mlir::raw_ostream &operator<<(mlir::raw_ostream &os, LayoutType type); +mlir::raw_ostream &operator<<(mlir::raw_ostream &os, PrecisionType type); + +namespace detail { +struct TensorTypeStorage : public mlir::TypeStorage { + TensorTypeStorage(TargetType target, + LayoutType layout, + PrecisionType precision) + : target_(target), layout_(layout), precision_(precision) {} + + using KeyTy = std::tuple; + + bool operator==(const KeyTy &key) const { + return key == KeyTy(target_, layout_, precision_); + } + + static llvm::hash_code hashKey(const KeyTy &key) { + return llvm::hash_value(key); + } + + static TensorTypeStorage *construct( + mlir::TypeStorageAllocator &allocator, // NOLINT + const KeyTy &key) { + return new (allocator.allocate()) + TensorTypeStorage(std::get<0>(key), std::get<1>(key), std::get<2>(key)); + } + + TargetType target_; + LayoutType layout_; + PrecisionType precision_; +}; +} // namespace detail class TensorType : public mlir::Type::TypeBase #include -namespace infrt::dialect { +namespace infrt { +namespace dialect { struct MyScopedDiagnosicHandler::Impl { Impl() : diag_stream_(diag_str_) {} @@ -49,4 +51,5 @@ mlir::LogicalResult MyScopedDiagnosicHandler::handler(mlir::Diagnostic *diag) { return mlir::failure(true); } -} // namespace infrt::dialect +} // namespace dialect +} // namespace infrt diff --git a/paddle/infrt/dialect/diagnostic_utils.h b/paddle/infrt/dialect/diagnostic_utils.h index 3a8098cf75181..746e61c8fe5c3 100644 --- a/paddle/infrt/dialect/diagnostic_utils.h +++ b/paddle/infrt/dialect/diagnostic_utils.h @@ -18,7 +18,8 @@ #include -namespace infrt::dialect { +namespace infrt { +namespace dialect { /** * A scoped diagnostic handler to help debug MLIR process. @@ -36,4 +37,5 @@ class MyScopedDiagnosicHandler : public mlir::SourceMgrDiagnosticHandler { std::unique_ptr impl_; }; -} // namespace infrt::dialect +} // namespace dialect +} // namespace infrt diff --git a/paddle/infrt/dialect/dialect.cc b/paddle/infrt/dialect/dialect.cc index cbcd5d0f0fa78..fe07b91d22ed5 100644 --- a/paddle/infrt/dialect/dialect.cc +++ b/paddle/infrt/dialect/dialect.cc @@ -13,24 +13,26 @@ // limitations under the License. #include +#include #include -#include #include #include -#include #include #include -namespace infrt::hlir::dialect { +namespace infrt { +namespace hlir { +namespace dialect { -class CinnDialect : public ::mlir::Dialect { +class CinnDialect : public mlir::Dialect { public: - explicit CinnDialect(::mlir::MLIRContext* ctx); + explicit CinnDialect(mlir::MLIRContext* ctx); //! We should register this function in dialect static llvm::StringRef getDialectNamespace() { return "infrt::hlir::dialect"; } }; - -} // namespace infrt::hlir::dialect +} // namespace dialect +} // namespace hlir +} // namespace infrt diff --git a/paddle/infrt/dialect/infrt_base.cc b/paddle/infrt/dialect/infrt_base.cc index b28ad5ad4b5a5..e8005661bbd65 100644 --- a/paddle/infrt/dialect/infrt_base.cc +++ b/paddle/infrt/dialect/infrt_base.cc @@ -18,7 +18,8 @@ #include "paddle/infrt/dialect/dense_tensor.h" #include "paddle/infrt/dialect/test_kernels.h" -namespace infrt::dialect { +namespace infrt { +namespace dialect { // ----INFRTDialect definition begin---- void INFRTDialect::initialize() { @@ -124,4 +125,5 @@ void INFRTDialect::printType(mlir::Type type, // ----INFRTDialect definition end---- -} // namespace infrt::dialect +} // namespace dialect +} // namespace infrt diff --git a/paddle/infrt/dialect/infrt_base.h b/paddle/infrt/dialect/infrt_base.h index 58acd7c9a409a..1a7fbcf395a6e 100644 --- a/paddle/infrt/dialect/infrt_base.h +++ b/paddle/infrt/dialect/infrt_base.h @@ -18,19 +18,17 @@ #include #include #include -#include #include #include #include "paddle/infrt/dialect/infrt_base.hpp.inc" -namespace infrt::dialect { - -class INFRTDialect : public ::mlir::Dialect { - explicit INFRTDialect(::mlir::MLIRContext *context) - : ::mlir::Dialect(getDialectNamespace(), - context, - ::mlir::TypeID::get()) { +namespace infrt { +namespace dialect { +class INFRTDialect : public mlir::Dialect { + explicit INFRTDialect(mlir::MLIRContext *context) + : mlir::Dialect( + getDialectNamespace(), context, mlir::TypeID::get()) { initialize(); } @@ -41,15 +39,12 @@ class INFRTDialect : public ::mlir::Dialect { mlir::DialectAsmPrinter &printer) const override; void initialize(); - friend class ::mlir::MLIRContext; + friend class mlir::MLIRContext; public: static ::llvm::StringRef getDialectNamespace() { return "infrt"; } }; - -} // namespace infrt::dialect - -namespace mlir { +} // namespace dialect template static mlir::IntegerAttr createI32Attr(mlir::OpBuilder &b, // NOLINT @@ -58,17 +53,16 @@ static mlir::IntegerAttr createI32Attr(mlir::OpBuilder &b, // NOLINT return b.getIntegerAttr(b.getI32Type(), constant); } -static mlir::SmallVector<::mlir::Value, 4> cvtValueToValueRange( +static mlir::SmallVector cvtValueToValueRange( const mlir::Value &operand) { - return mlir::SmallVector<::mlir::Value, 4>(1, operand); + return mlir::SmallVector(1, operand); } -static mlir::SmallVector<::mlir::Value, 4> concatTwoValueRange( +static mlir::SmallVector concatTwoValueRange( mlir::ValueRange operand_0, mlir::ValueRange operand_1) { - mlir::SmallVector<::mlir::Value, 4> operands; + mlir::SmallVector operands; operands.append(operand_0.begin(), operand_0.end()); operands.append(operand_1.begin(), operand_1.end()); return operands; } - -} // namespace mlir +} // namespace infrt diff --git a/paddle/infrt/dialect/infrt_base.td b/paddle/infrt/dialect/infrt_base.td index 7d6fdbbbf2f68..1abd294236d93 100644 --- a/paddle/infrt/dialect/infrt_base.td +++ b/paddle/infrt/dialect/infrt_base.td @@ -28,11 +28,11 @@ def TensorMapType : def BufferType : OpaqueType<"b", "buffer", "buffer">; class INFRT_createI32Attr : NativeCodeCall< - "mlir::createI32Attr($_builder, $_loc, " # value # ")">; + "infrt::createI32Attr($_builder, $_loc, " # value # ")">; def INFRT_cvtValueToValueRange : NativeCodeCall< - "mlir::cvtValueToValueRange($0)">; + "infrt::cvtValueToValueRange($0)">; def INFRT_concatTwoValueRange : NativeCodeCall< - "mlir::concatTwoValueRange($0, $1)">; + "infrt::concatTwoValueRange($0, $1)">; #endif // INFRT_BASE diff --git a/paddle/infrt/dialect/init_infrt_dialects.cc b/paddle/infrt/dialect/init_infrt_dialects.cc index 4bc2bf70942d2..c3769414dbb39 100644 --- a/paddle/infrt/dialect/init_infrt_dialects.cc +++ b/paddle/infrt/dialect/init_infrt_dialects.cc @@ -23,12 +23,10 @@ #include "paddle/infrt/dialect/tensor_shape.h" namespace infrt { - -void RegisterCinnDialects(mlir::DialectRegistry& registry) { // NOLINT - registry.insert(); - registry.insert(); - registry.insert(); - registry.insert(); +void registerCinnDialects(mlir::DialectRegistry ®istry) { // NOLINT + registry.insert(); } - } // namespace infrt diff --git a/paddle/infrt/dialect/init_infrt_dialects.h b/paddle/infrt/dialect/init_infrt_dialects.h index 50caca018980d..0912e9ef2555b 100644 --- a/paddle/infrt/dialect/init_infrt_dialects.h +++ b/paddle/infrt/dialect/init_infrt_dialects.h @@ -14,10 +14,8 @@ #pragma once -#include "mlir/IR/Dialect.h" - +#include +#include namespace infrt { - -void RegisterCinnDialects(mlir::DialectRegistry& registry); // NOLINT - +void registerCinnDialects(mlir::DialectRegistry ®istry); // NOLINT } // namespace infrt diff --git a/paddle/infrt/dialect/mlir_loader.cc b/paddle/infrt/dialect/mlir_loader.cc index b318a6a763483..1d0696e77dcda 100644 --- a/paddle/infrt/dialect/mlir_loader.cc +++ b/paddle/infrt/dialect/mlir_loader.cc @@ -16,8 +16,8 @@ #include #include +#include #include -#include #include #include #include @@ -30,12 +30,15 @@ #include "paddle/infrt/dialect/diagnostic_utils.h" #include "paddle/infrt/dialect/init_infrt_dialects.h" -namespace infrt::dialect { +namespace infrt { +namespace dialect { mlir::OwningModuleRef LoadMlirSource(mlir::MLIRContext* context, const std::string& mlir_source) { // context->allowUnregisteredDialects(); - RegisterCinnDialects(context->getDialectRegistry()); + mlir::DialectRegistry registry; + registerCinnDialects(registry); + context->appendDialectRegistry(registry); // Currenetly, We only used the CinnDialect and mlir::BuiltinDialect is // enough。Don't need StandardOpsDialect. // context->getDialectRegistry().insert(); @@ -57,9 +60,9 @@ mlir::OwningModuleRef LoadMlirSource(mlir::MLIRContext* context, mlir::OwningModuleRef LoadMlirFile(const std::string& file_name, mlir::MLIRContext* context) { // context->allowUnregisteredDialects(); - RegisterCinnDialects(context->getDialectRegistry()); - context->getDialectRegistry().insert(); - + mlir::DialectRegistry registry; + registerCinnDialects(registry); + context->appendDialectRegistry(registry); mlir::ScopedDiagnosticHandler scope_handler( context, [](mlir::Diagnostic& diag) { if (diag.getSeverity() != mlir::DiagnosticSeverity::Error) @@ -71,4 +74,5 @@ mlir::OwningModuleRef LoadMlirFile(const std::string& file_name, return mlir::parseSourceFile(std::string(file_name), context); } -} // namespace infrt::dialect +} // namespace dialect +} // namespace infrt diff --git a/paddle/infrt/dialect/mlir_loader.h b/paddle/infrt/dialect/mlir_loader.h index 092da7d9ce03f..5e50ad9e5a271 100644 --- a/paddle/infrt/dialect/mlir_loader.h +++ b/paddle/infrt/dialect/mlir_loader.h @@ -15,16 +15,17 @@ #pragma once #include -#include +#include #include #include -namespace infrt::dialect { +namespace infrt { +namespace dialect { mlir::OwningModuleRef LoadMlirSource(mlir::MLIRContext* context, const std::string& mlir_source); mlir::OwningModuleRef LoadMlirFile(const std::string& file_name, mlir::MLIRContext* context); - -} // namespace infrt::dialect +} // namespace dialect +} // namespace infrt diff --git a/paddle/infrt/dialect/mlir_loader_test.cc b/paddle/infrt/dialect/mlir_loader_test.cc index 1b622d585ad8e..1115053073044 100644 --- a/paddle/infrt/dialect/mlir_loader_test.cc +++ b/paddle/infrt/dialect/mlir_loader_test.cc @@ -17,14 +17,15 @@ #include #include #include -#include +#include #include #include #include "paddle/infrt/dialect/init_infrt_dialects.h" -namespace infrt::dialect { +namespace infrt { +namespace dialect { TEST(MlirLoader, basic) { mlir::MLIRContext context; @@ -42,8 +43,7 @@ func @main() -> f32 { )ROC"; auto module = LoadMlirSource(&context, source); - module->verify(); - + EXPECT_TRUE(mlir::succeeded(module->verify())); LOG(INFO) << "module name: " << module->getOperationName().data(); for (auto func : module->getOps()) { LOG(INFO) << "get func " << func.getName().str(); @@ -54,4 +54,5 @@ func @main() -> f32 { } } -} // namespace infrt::dialect +} // namespace dialect +} // namespace infrt diff --git a/paddle/infrt/dialect/mlir_tests/rewrite.mlir b/paddle/infrt/dialect/mlir_tests/rewrite.mlir index bfad9d1f6924d..5e207634da8e4 100644 --- a/paddle/infrt/dialect/mlir_tests/rewrite.mlir +++ b/paddle/infrt/dialect/mlir_tests/rewrite.mlir @@ -20,5 +20,5 @@ func @main() -> tensor { %c2 = "pd.matmul"(%e1, %b2) {transpose_x=true, transpose_y=false} : (tensor, tensor) -> tensor %d2 = "pd.elementwise_add"(%c2, %bias2) {axis=1:i32} : (tensor, tensor) -> tensor %e2 = "pd.relu"(%d2) {} : (tensor) -> tensor - infrt.return %e2 : tensor + "pd.fetch"(%e2) {name="output"} :(tensor)->() } \ No newline at end of file diff --git a/paddle/infrt/dialect/mlir_tests/rewrite_conv_bn.mlir b/paddle/infrt/dialect/mlir_tests/rewrite_conv_bn.mlir index 9ea1ec0ebca36..2889b92b18ef0 100644 --- a/paddle/infrt/dialect/mlir_tests/rewrite_conv_bn.mlir +++ b/paddle/infrt/dialect/mlir_tests/rewrite_conv_bn.mlir @@ -11,5 +11,5 @@ func @main() -> tensor { %c = "pd.conv2d"(%a, %filter, %bias) {} : (tensor, tensor<3x64x3x3xf32>, tensor<64xf32>) -> tensor %d = "pd.batch_norm"(%c, %scale, %bias2, %mean, %var) {} : (tensor, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor - infrt.return %d : tensor + "pd.fetch"(%d) {name="output"} :(tensor)->() } \ No newline at end of file diff --git a/paddle/infrt/dialect/mlir_tests/trt_ops.mlir b/paddle/infrt/dialect/mlir_tests/trt_ops.mlir index 009b6d1c19653..d98f107bab41e 100644 --- a/paddle/infrt/dialect/mlir_tests/trt_ops.mlir +++ b/paddle/infrt/dialect/mlir_tests/trt_ops.mlir @@ -18,5 +18,5 @@ func @main() -> tensor { %d2 = "pd.elementwise_add"(%c2, %bias2) {axis=1:i32} : (tensor, tensor) -> tensor %e2 = "pd.relu"(%d2) {} : (tensor) -> tensor - "pd.fetch"(%e2) :(tensor)->() + "pd.fetch"(%e2) {name="output"} :(tensor)->() } diff --git a/paddle/infrt/dialect/ops.td b/paddle/infrt/dialect/ops.td deleted file mode 100644 index 264134a447c63..0000000000000 --- a/paddle/infrt/dialect/ops.td +++ /dev/null @@ -1,6 +0,0 @@ -include "mlir/IR/OpBase.td" -include "paddle/infrt/dialect/infrt_base.td" - - -class INFRT_Op traits = []> : - Op; diff --git a/paddle/infrt/dialect/opt.cc b/paddle/infrt/dialect/opt.cc index d90d25230d0c2..5bcf5a23f4c53 100644 --- a/paddle/infrt/dialect/opt.cc +++ b/paddle/infrt/dialect/opt.cc @@ -12,34 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include - -#include - -#include "paddle/infrt/common/global.h" #include "paddle/infrt/dialect/init_infrt_dialects.h" -#include "paddle/infrt/dialect/mlir_loader.h" int main(int argc, char **argv) { - mlir::MLIRContext *context = infrt::Global::getMLIRContext(); - - auto ®istry = context->getDialectRegistry(); - infrt::RegisterCinnDialects(registry); - + mlir::DialectRegistry registry; + infrt::registerCinnDialects(registry); mlir::registerCanonicalizerPass(); - return mlir::failed( - mlir::MlirOptMain(argc, argv, "INFRT mlir pass driver", registry)); + mlir::MlirOptMain(argc, argv, "infrt mlir pass driver", registry)); } diff --git a/paddle/infrt/dialect/pd_op_base.td b/paddle/infrt/dialect/pd_op_base.td index af53df113dfb3..a3e3c4ae59277 100644 --- a/paddle/infrt/dialect/pd_op_base.td +++ b/paddle/infrt/dialect/pd_op_base.td @@ -16,7 +16,7 @@ def PD_Dialect : Dialect { This dialect contains the PaddlePaddle operators. }]; - let cppNamespace = "::mlir::pd"; + let cppNamespace = "mlir::pd"; } class PD_Op traits = []> : diff --git a/paddle/infrt/dialect/pd_ops.cc b/paddle/infrt/dialect/pd_ops.cc index ce10be6d100f8..fe38996883846 100644 --- a/paddle/infrt/dialect/pd_ops.cc +++ b/paddle/infrt/dialect/pd_ops.cc @@ -14,10 +14,15 @@ #include "paddle/infrt/dialect/pd_ops.h" -#include "mlir/IR/Matchers.h" -#include "mlir/IR/PatternMatch.h" +#include +#include #include "paddle/infrt/dialect/infrt_base.h" +#define GET_OP_CLASSES +#include "paddle/infrt/dialect/pd_ops.cpp.inc" // NOLINT + +#include "paddle/infrt/dialect/rewrite.hpp.inc" // NOLINT + namespace mlir { namespace pd { PaddleDialect::PaddleDialect(MLIRContext *context) @@ -36,12 +41,6 @@ mlir::Operation *PaddleDialect::materializeConstant(mlir::OpBuilder &builder, return builder.create(loc, value); } -#define GET_OP_CLASSES -#include "paddle/infrt/dialect/pd_ops.cpp.inc" // NOLINT -#undef GET_OP_CLASSES - -#include "paddle/infrt/dialect/rewrite.hpp.inc" // NOLINT - void ConstantOp::build(OpBuilder &builder, OperationState &state, Attribute value) { @@ -66,8 +65,8 @@ LogicalResult ConstantOp::inferReturnTypes( inferredReturnTypes.push_back(attributes.get("value").getType()); return success(); } -::mlir::OpFoldResult ConstantOp::fold( - ::llvm::ArrayRef<::mlir::Attribute> operands) { +mlir::OpFoldResult ConstantOp::fold( + ::llvm::ArrayRef operands) { return value(); } @@ -82,11 +81,11 @@ LogicalResult ElementwiseAdd::inferReturnTypes( return success(); } void ElementwiseAdd::getCanonicalizationPatterns( - ::mlir::OwningRewritePatternList &results, ::mlir::MLIRContext *context) { + mlir::OwningRewritePatternList &results, mlir::MLIRContext *context) { results.insert(context); } -::mlir::OpFoldResult ElementwiseAdd::fold( +mlir::OpFoldResult ElementwiseAdd::fold( llvm::ArrayRef operands) { if (getElementTypeOrSelf(getType()).isa()) { if (!operands[0] || !operands[1]) return {}; @@ -154,17 +153,17 @@ LogicalResult MulOp::inferReturnTypes( } void ReluOp::getCanonicalizationPatterns( - ::mlir::OwningRewritePatternList &results, ::mlir::MLIRContext *context) { + mlir::OwningRewritePatternList &results, mlir::MLIRContext *context) { results.insert(context); } void FusedRepeatedFCRelu::getCanonicalizationPatterns( - ::mlir::OwningRewritePatternList &results, ::mlir::MLIRContext *context) { + mlir::OwningRewritePatternList &results, mlir::MLIRContext *context) { results.insert(context); } void BatchNormOp::getCanonicalizationPatterns( - ::mlir::OwningRewritePatternList &results, ::mlir::MLIRContext *context) { + mlir::OwningRewritePatternList &results, mlir::MLIRContext *context) { results.insert(context); } diff --git a/paddle/infrt/dialect/pd_ops.h b/paddle/infrt/dialect/pd_ops.h index 71e0a53988d1a..7d1d1d6f58451 100644 --- a/paddle/infrt/dialect/pd_ops.h +++ b/paddle/infrt/dialect/pd_ops.h @@ -14,21 +14,20 @@ #pragma once -#include "mlir/Dialect/Traits.h" -#include "mlir/IR/Attributes.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/Dialect.h" -#include "mlir/IR/Function.h" -#include "mlir/IR/Matchers.h" -#include "mlir/IR/Module.h" -#include "mlir/IR/OpImplementation.h" -#include "mlir/IR/StandardTypes.h" -#include "mlir/IR/TypeUtilities.h" -#include "mlir/Interfaces/CallInterfaces.h" -#include "mlir/Interfaces/DerivedAttributeOpInterface.h" -#include "mlir/Interfaces/InferTypeOpInterface.h" -#include "mlir/Interfaces/LoopLikeInterface.h" -#include "mlir/Interfaces/SideEffectInterfaces.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace mlir { namespace pd { @@ -53,9 +52,8 @@ class PaddleDialect : public Dialect { } }; -#define GET_OP_CLASSES -#include "paddle/infrt/dialect/pd_ops.hpp.inc" -#undef GET_OP_CLASSES - } // namespace pd } // namespace mlir + +#define GET_OP_CLASSES +#include "paddle/infrt/dialect/pd_ops.hpp.inc" diff --git a/paddle/infrt/dialect/pd_ops.td b/paddle/infrt/dialect/pd_ops.td index b020b7ad5dbc7..3addf15082a12 100644 --- a/paddle/infrt/dialect/pd_ops.td +++ b/paddle/infrt/dialect/pd_ops.td @@ -24,6 +24,16 @@ def PD_FeedOp : PD_Op<"feed"> { def PD_FetchOp : PD_Op<"fetch", [Terminator]> { let summary = "fetch Op"; + let description = [{ + Return the output tensor from the subgraph. + }]; + + let arguments = (ins PD_Tensor :$inputs, StrAttr:$name); +} + +def PD_ReturnOp : PD_Op<"return", [Terminator]> { + let summary = "return Op"; + let description = [{ Fetch tensor from the graph. }]; @@ -31,7 +41,7 @@ def PD_FetchOp : PD_Op<"fetch", [Terminator]> { let arguments = (ins Variadic:$inputs); } -def PD_GraphOp : PD_Op<"graph", [SingleBlockImplicitTerminator<"FetchOp">]> { +def PD_GraphOp : PD_Op<"graph", [SingleBlockImplicitTerminator<"ReturnOp">]> { let summary = "paddle graph Op"; let description = [{ Describe a paddle graph or subgraph. @@ -50,7 +60,7 @@ def PD_ConstantOp : PD_Op<"constant", [NoSideEffect, ConstantLike, DeclareOpInte let hasFolder = 1; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &state, Attribute value">, + OpBuilder<(ins "Attribute":$value)>, ]; } diff --git a/paddle/infrt/dialect/pd_types.h b/paddle/infrt/dialect/pd_types.h index 6f9fe56338a9f..0da888a9c0769 100644 --- a/paddle/infrt/dialect/pd_types.h +++ b/paddle/infrt/dialect/pd_types.h @@ -18,12 +18,11 @@ #pragma once -#include "mlir/IR/Diagnostics.h" -#include "mlir/IR/Location.h" -#include "mlir/IR/Operation.h" -#include "mlir/IR/StandardTypes.h" -#include "mlir/IR/TypeUtilities.h" -#include "mlir/IR/Types.h" +#include +#include +#include +#include +#include namespace mlir { namespace PD { diff --git a/paddle/infrt/dialect/print_ir.cc b/paddle/infrt/dialect/print_ir.cc index 43a3577b90f10..5cfd16ee85943 100644 --- a/paddle/infrt/dialect/print_ir.cc +++ b/paddle/infrt/dialect/print_ir.cc @@ -11,26 +11,25 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include "llvm/ADT/Optional.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ScopedPrinter.h" -#include "llvm/Support/raw_os_ostream.h" -#include "llvm/Support/raw_ostream.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/IR/AsmState.h" -#include "mlir/IR/Block.h" -#include "mlir/IR/MLIRContext.h" -#include "mlir/IR/Module.h" -#include "mlir/IR/Operation.h" -#include "mlir/IR/Region.h" -#include "mlir/IR/Verifier.h" -#include "mlir/Parser.h" -#include "mlir/Pass/PassManager.h" -#include "mlir/Support/LogicalResult.h" -#include "mlir/Transforms/Passes.h" #include "paddle/infrt/common/global.h" #include "paddle/infrt/dialect/init_infrt_dialects.h" @@ -114,17 +113,15 @@ int main(int argc, char **argv) { mlir::registerPassManagerCLOptions(); cl::ParseCommandLineOptions(argc, argv, "mlir demo"); - mlir::MLIRContext *context = infrt::Global::getMLIRContext(); - // context->allowUnregisteredDialects(); - auto ®istry = context->getDialectRegistry(); - infrt::RegisterCinnDialects(registry); - + mlir::DialectRegistry registry; + infrt::registerCinnDialects(registry); + mlir::MLIRContext context(registry); // mlir will verify module automatically after parsing. // https://github.com/llvm/llvm-project/blob/38d18d93534d290d045bbbfa86337e70f1139dc2/mlir/lib/Parser/Parser.cpp#L2051 // mlir::OwningModuleRef module_ref = mlir::parseSourceString(mlir_source, // context); mlir::OwningModuleRef module_ref = - mlir::parseSourceFile(inputFilename, context); + mlir::parseSourceFile(inputFilename, &context); std::cout << "----------print IR Structure begin----------" << std::endl; printOperation(module_ref->getOperation(), 0); std::cout << "----------print IR Structure end----------" << std::endl; diff --git a/paddle/infrt/dialect/tensor_shape.cc b/paddle/infrt/dialect/tensor_shape.cc index ef5a5525cb22f..92c03818264ee 100644 --- a/paddle/infrt/dialect/tensor_shape.cc +++ b/paddle/infrt/dialect/tensor_shape.cc @@ -17,16 +17,16 @@ #include #include #include +#include +#include #include -#include -#include #include #include -#include #include #include -namespace infrt::ts { +namespace infrt { +namespace ts { using namespace mlir; // NOLINT void TensorShapeDialect::initialize() { @@ -48,8 +48,8 @@ Type TensorShapeDialect::parseType(DialectAsmParser &parser) const { return Type(); } -void TensorShapeDialect::printType(::mlir::Type type, - ::mlir::DialectAsmPrinter &os) const { +void TensorShapeDialect::printType(mlir::Type type, + mlir::DialectAsmPrinter &os) const { if (type.isa()) { os << "shape"; return; @@ -61,8 +61,10 @@ void TensorShapeDialect::printType(::mlir::Type type, } llvm_unreachable("unexpected 'shape' type kind"); } +} // namespace ts +} // namespace infrt #define GET_OP_CLASSES #include "paddle/infrt/dialect/tensor_shape.cpp.inc" // NOLINT -} // namespace infrt::ts +#include "paddle/infrt/dialect/tensor_shape_dialect.cpp.inc" diff --git a/paddle/infrt/dialect/tensor_shape.h b/paddle/infrt/dialect/tensor_shape.h index bd3fa8853675a..af892af735d2a 100644 --- a/paddle/infrt/dialect/tensor_shape.h +++ b/paddle/infrt/dialect/tensor_shape.h @@ -17,7 +17,8 @@ #include #include -namespace infrt::ts { +namespace infrt { +namespace ts { class ShapeType : public mlir::Type::TypeBase { @@ -31,10 +32,9 @@ class PartialShapeType : public mlir::Type::TypeBase()">, "!ts.shape type">, BuildableType<"$_builder.getType<::infrt::ts::ShapeType>()"> { - let typeDescription = [{ + let description = [{ `!ts.shape type` represents a static tensor shape. }]; } @@ -27,7 +27,7 @@ BuildableType<"$_builder.getType<::infrt::ts::ShapeType>()"> { def TS_PartialShape : DialectType()">, "!ts.partial_shape type">, BuildableType<"$_builder.getType<::infrt::ts::PartialShapeType>()"> { - let typeDescription = [{ + let description = [{ `!ts.partial_shape type` represents either a static tensor shape, unranked tensor shape or a ranked tensor shape with unknown dimension sizes. }]; diff --git a/paddle/infrt/dialect/tensorrt/trt_exec.cc b/paddle/infrt/dialect/tensorrt/trt_exec.cc index dc0f2acb2b733..1baef7a3f77fd 100644 --- a/paddle/infrt/dialect/tensorrt/trt_exec.cc +++ b/paddle/infrt/dialect/tensorrt/trt_exec.cc @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#include +#include #include #include -#include "llvm/Support/CommandLine.h" -#include "mlir/Pass/PassManager.h" #include "paddle/infrt/common/global.h" #include "paddle/infrt/dialect/mlir_loader.h" #include "paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.h" diff --git a/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.cc b/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.cc index 181f462962aee..1da80ef2c3b10 100644 --- a/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.cc +++ b/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.cc @@ -14,14 +14,13 @@ #include "paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.h" +#include +#include +#include +#include #include #include #include -#include "llvm/ADT/SetVector.h" -#include "mlir/Analysis/SliceAnalysis.h" -#include "mlir/IR/Builders.h" -#include "paddle/infrt/dialect/pd_ops.h" -#include "paddle/infrt/dialect/tensorrt/trt_ops.h" namespace infrt { namespace trt { @@ -32,9 +31,9 @@ namespace { // Reference the function nameed "FlexibleDFS" but defined in: // paddle/fluid/framework/ir/subgraph_detector.cc. -bool reverseDfs(std::vector<::mlir::Operation *> source, - const std::function &func) { - std::unordered_set visited; +bool reverseDfs(std::vector source, + const std::function &func) { + std::unordered_set visited; while (!source.empty()) { auto node = source.back(); source.pop_back(); @@ -44,7 +43,7 @@ bool reverseDfs(std::vector<::mlir::Operation *> source, auto values = node->getOperands(); for (auto value : values) { // if the value is a block argument, the node is nullptr. - ::mlir::Operation *node = value.getDefiningOp(); + mlir::Operation *node = value.getDefiningOp(); if (node != nullptr && !visited.count(node)) { source.emplace_back(node); } @@ -54,19 +53,19 @@ bool reverseDfs(std::vector<::mlir::Operation *> source, } // merge the first&second graph op to a new graph op. -void mergeTwoAdjacentGraphOp(::mlir::OpBuilder &builder, // NOLINT - ::mlir::pd::GraphOp first, - ::mlir::pd::GraphOp second) { +void mergeTwoAdjacentGraphOp(mlir::OpBuilder &builder, // NOLINT + mlir::pd::GraphOp first, + mlir::pd::GraphOp second) { // comput inputs and outputs - ::llvm::SmallVector<::mlir::Value, 4> inputs(first.getOperands()), outputs; - for (::mlir::Value input : second.getOperands()) { + ::llvm::SmallVector inputs(first.getOperands()), outputs; + for (mlir::Value input : second.getOperands()) { if (input.getDefiningOp() != first) { inputs.push_back(input); } } - ::llvm::DenseMap<::mlir::Value, unsigned int> op_output_mapping; - for (::mlir::Value output : first.getResults()) { - for (::mlir::Operation *user : output.getUsers()) { + ::llvm::DenseMap op_output_mapping; + for (mlir::Value output : first.getResults()) { + for (mlir::Operation *user : output.getUsers()) { if (user != second && user->getParentOp() != second) { op_output_mapping[output] = outputs.size(); outputs.push_back(output); @@ -74,19 +73,19 @@ void mergeTwoAdjacentGraphOp(::mlir::OpBuilder &builder, // NOLINT } } } - auto fetch_op = second.getBody()->getTerminator(); - outputs.append(fetch_op->getOperands().begin(), - fetch_op->getOperands().end()); - ::llvm::SmallVector<::mlir::Type, 4> fetch_types; + auto return_op = second.getBody()->getTerminator(); + outputs.append(return_op->getOperands().begin(), + return_op->getOperands().end()); + ::llvm::SmallVector return_types; for (auto value : outputs) { - fetch_types.push_back(value.getType()); + return_types.push_back(value.getType()); } // create the new graph op builder.setInsertionPoint(first); auto loc = first.getLoc(); - auto graph_op = builder.create<::mlir::pd::GraphOp>(loc, fetch_types, inputs); - ::mlir::Block *block = new ::mlir::Block; + auto graph_op = builder.create(loc, return_types, inputs); + mlir::Block *block = new mlir::Block; auto copy_range = second.getBody()->without_terminator(); block->getOperations().splice(block->begin(), second.getBody()->getOperations(), @@ -98,18 +97,18 @@ void mergeTwoAdjacentGraphOp(::mlir::OpBuilder &builder, // NOLINT copy_range.begin(), copy_range.end()); builder.setInsertionPointToEnd(block); - builder.create(loc, outputs); + builder.create(loc, outputs); graph_op.body().push_back(block); // mapping the output unsigned int num_result = first.getNumResults(); - fetch_op = first.getBody()->getTerminator(); + return_op = first.getBody()->getTerminator(); for (unsigned int index = 0; index < num_result; ++index) { auto origin_value = first.getResult(index); if (op_output_mapping.find(origin_value) == op_output_mapping.end()) { - origin_value.replaceAllUsesWith(fetch_op->getOperand(index)); + origin_value.replaceAllUsesWith(return_op->getOperand(index)); } else { - auto inner_value = fetch_op->getOperand(index); + auto inner_value = return_op->getOperand(index); auto outer_value = graph_op.getResult(op_output_mapping[origin_value]); while (!origin_value.use_empty()) { auto replace_value = @@ -128,13 +127,13 @@ void mergeTwoAdjacentGraphOp(::mlir::OpBuilder &builder, // NOLINT // Topological sort the function op. void topoSortBlock(mlir::Block &body) { // NOLINT - llvm::SetVector toSort; + llvm::SetVector toSort; if (body.empty()) return; for (auto it = body.rbegin(); it != body.rend(); ++it) { toSort.insert(&*it); } - llvm::SetVector result = - ::mlir::topologicalSort(std::move(toSort)); + llvm::SetVector result = + mlir::topologicalSort(std::move(toSort)); for (auto *op : result) { op->moveBefore(body.getTerminator()); } @@ -145,21 +144,21 @@ void topoSortBlock(mlir::Block &body) { // NOLINT // Implementation of the trtGraphFusePass. void trtGraphFusePass::runOnFunction() { mlir::Block &body = getFunction().front(); - ::mlir::OpBuilder builder(&body, body.begin()); + mlir::OpBuilder builder(&body, body.begin()); bool changed = false; do { changed = false; for (auto &op : body) { - ::mlir::pd::GraphOp graph_op = - ::llvm::dyn_cast_or_null<::mlir::pd::GraphOp>(&op); + mlir::pd::GraphOp graph_op = + ::llvm::dyn_cast_or_null(&op); if (nullptr == graph_op) continue; for (auto user_op : op.getUsers()) { - ::mlir::pd::GraphOp user_graph_op = - ::llvm::dyn_cast_or_null<::mlir::pd::GraphOp>(user_op); + mlir::pd::GraphOp user_graph_op = + ::llvm::dyn_cast_or_null(user_op); if (nullptr == user_graph_op) continue; // get all dst input nodes except src. - std::vector<::mlir::Operation *> source_nodes; + std::vector source_nodes; for (auto operand : user_op->getOperands()) { auto input = operand.getDefiningOp(); if (input != &op && input != nullptr) { @@ -167,9 +166,8 @@ void trtGraphFusePass::runOnFunction() { } } // Reverse DFS from the source_nodes. - if (!reverseDfs(source_nodes, [&op](const ::mlir::Operation *n) { - return n == &op; - })) { + if (!reverseDfs(source_nodes, + [&op](const mlir::Operation *n) { return n == &op; })) { mergeTwoAdjacentGraphOp(builder, graph_op, user_graph_op); changed = true; break; diff --git a/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.h b/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.h index e7134e88f316c..f1e555c6f67ec 100644 --- a/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.h +++ b/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.h @@ -13,7 +13,7 @@ // limitations under the License. #pragma once -#include "mlir/Pass/Pass.h" +#include namespace infrt { namespace trt { @@ -28,15 +28,15 @@ namespace trt { * %a = "pd.feed"()... * %c = "pd.graph"(%a) { * %m = "pd.conv2d"(%a)... - * "pd.fetch" %m + * "pd.return" %m * } ... * %d = "pd.graph"(%c) { * %m = "pd.conv3d"(%c)... - * "pd.fetch" %m + * "pd.return" %m * } ... * %f = "pd.graph"(%a) { * %m = "pd.conv2d"(%a)... - * "pd.fetch" %m + * "pd.return" %m * } ... * "pd.fetch" %d, %f * @@ -47,13 +47,13 @@ namespace trt { * %m = "pd.conv2d"(%a)... * %n = "pd.conv3d"(%m)... * %s = "pd.conv2d"(%a)... - * "pd.fetch" %n, %s + * "pd.return" %n, %s * } ... * "pd.fetch" %d, %f * } */ class trtGraphFusePass - : public ::mlir::PassWrapper { + : public mlir::PassWrapper { public: ::llvm::StringRef getName() const override { return "trtGraphFusePass"; } void runOnFunction() override; diff --git a/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.cc b/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.cc index 2b45364de2036..257f2b5285425 100644 --- a/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.cc +++ b/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.cc @@ -14,7 +14,7 @@ #include "paddle/infrt/dialect/tensorrt/trt_graph_split_pass.h" -#include "mlir/IR/Builders.h" +#include #include "paddle/infrt/dialect/pd_ops.h" #include "paddle/infrt/dialect/tensorrt/trt_ops.h" @@ -22,24 +22,24 @@ namespace infrt { namespace trt { // Implementation of the trtGraphSplitPass。 void trtGraphSplitPass::runOnFunction() { - std::vector<::mlir::pd::GraphOp> worklist; - ::mlir::Block& block = getFunction().front(); + std::vector worklist; + mlir::Block& block = getFunction().front(); for (auto& op : block) { - ::mlir::pd::GraphOp graph_op = - ::llvm::dyn_cast_or_null<::mlir::pd::GraphOp>(&op); + mlir::pd::GraphOp graph_op = + ::llvm::dyn_cast_or_null(&op); if (nullptr != graph_op && graph_op.getBody()->getOperations().size() <= min_subgraph_size_) { worklist.push_back(graph_op); } } while (!worklist.empty()) { - ::mlir::pd::GraphOp graph_op = worklist.back(); + mlir::pd::GraphOp graph_op = worklist.back(); worklist.pop_back(); - ::mlir::Block* body = graph_op.getBody(); - auto fetch_op = body->getTerminator(); - graph_op.replaceAllUsesWith(fetch_op->getOperands()); + mlir::Block* body = graph_op.getBody(); + auto return_op = body->getTerminator(); + graph_op.replaceAllUsesWith(return_op->getOperands()); auto copy_range = body->without_terminator(); - block.getOperations().splice(::mlir::Block::iterator(graph_op), + block.getOperations().splice(mlir::Block::iterator(graph_op), body->getOperations(), copy_range.begin(), copy_range.end()); diff --git a/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.h b/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.h index 092df0cf834e5..d30d186647fc3 100644 --- a/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.h +++ b/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.h @@ -13,7 +13,7 @@ // limitations under the License. #pragma once -#include "mlir/Pass/Pass.h" +#include namespace infrt { namespace trt { @@ -31,9 +31,9 @@ namespace trt { * %m = "pd.conv2d"(%a)... * %n = "pd.conv3d"(%m)... * %s = "pd.conv2d"(%a)... - * "pd.fetch" %n, %s + * "pd.return" (%n, %s) * } ... - * "pd.fetch" %d, %f + * "pd.fetch" (%d, %f) * } * * destination func: @@ -42,11 +42,11 @@ namespace trt { * %c = "pd.conv2d"(%a) ... * %d = "pd.conv3d"(%c) ... * %f = "pd.conv2d"(%a) ... - * "pd.fetch" %d, %f + * "pd.fetch" (%d, %f) * } */ class trtGraphSplitPass - : public ::mlir::PassWrapper { + : public mlir::PassWrapper { public: ::llvm::StringRef getName() const override { return "trtGraphSplitPass"; } void runOnFunction() override; diff --git a/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.cc b/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.cc index 7b7fbb05c1d13..4e8d40b982b2e 100644 --- a/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.cc +++ b/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.cc @@ -14,49 +14,48 @@ #include "paddle/infrt/dialect/tensorrt/trt_op_teller_pass.h" -#include "mlir/IR/Builders.h" +#include #include "paddle/infrt/dialect/pd_ops.h" -#include "paddle/infrt/dialect/tensorrt/trt_ops.h" namespace infrt { namespace trt { // Implementation of the trtOpTellerPass。 void trtOpTellerPass::runOnFunction() { - ::mlir::Block &body = getFunction().front(); - std::vector<::mlir::Operation *> worklist; + mlir::Block &body = getFunction().front(); + std::vector worklist; worklist.reserve(body.getOperations().size()); for (auto &op : body) { worklist.push_back(&op); } // Build GraphOp. - ::mlir::OpBuilder builder(&body, body.begin()); + mlir::OpBuilder builder(&body, body.begin()); while (!worklist.empty()) { auto *op = worklist.back(); worklist.pop_back(); if (op == nullptr) continue; - auto op1 = ::llvm::dyn_cast_or_null<::mlir::pd::FeedOp>(op); + auto op1 = ::llvm::dyn_cast_or_null(op); if (op1) continue; - auto op2 = ::llvm::dyn_cast_or_null<::mlir::pd::FetchOp>(op); + auto op2 = ::llvm::dyn_cast_or_null(op); if (op2) continue; - auto op3 = ::llvm::dyn_cast_or_null<::mlir::pd::GraphOp>(op); + auto op3 = ::llvm::dyn_cast_or_null(op); if (op3) continue; builder.setInsertionPoint(op); auto loc = getFunction().getLoc(); - auto graph_op = builder.create<::mlir::pd::GraphOp>( + auto graph_op = builder.create( loc, op->getResultTypes(), op->getOperands()); - ::llvm::SmallVector<::mlir::Value, 4> tblgen_repl_values; + ::llvm::SmallVector tblgen_repl_values; for (auto v : - ::llvm::SmallVector<::mlir::Value, 4>{graph_op.getODSResults(0)}) { + ::llvm::SmallVector{graph_op.getODSResults(0)}) { tblgen_repl_values.push_back(v); } op->replaceAllUsesWith(tblgen_repl_values); // Build graph op. - ::mlir::Block *block = new ::mlir::Block; + mlir::Block *block = new mlir::Block; graph_op.body().push_back(block); op->moveBefore(block, block->begin()); builder.setInsertionPointToEnd(block); - builder.create(loc, op->getResults()); + builder.create(loc, op->getResults()); } } } // namespace trt diff --git a/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.h b/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.h index b03945b3459c0..fb16c974f7fb3 100644 --- a/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.h +++ b/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.h @@ -13,7 +13,7 @@ // limitations under the License. #pragma once -#include "mlir/Pass/Pass.h" +#include namespace infrt { namespace trt { @@ -29,7 +29,7 @@ namespace trt { * %c = "pd.conv2d"(%a) ... * %d = "pd.conv3d"(%c) ... * %f = "pd.conv2d"(%a) ... - * "pd.fetch" %d, %f + * "pd.fetch" (%d, %f) * } * * destination func: @@ -37,23 +37,23 @@ namespace trt { * %a = "pd.feed"()... * %c = "pd.graph"(%a) { * %m = "pd.conv2d"(%a)... - * "pd.fetch" %m + * "pd.return" (%m) * } ... * %d = "pd.graph"(%c) { * %m = "pd.conv3d"(%c)... - * "pd.fetch" %m + * "pd.return" (%m) * } ... * %f = "pd.graph"(%a) { * %m = "pd.conv2d"(%a)... - * "pd.fetch" %m + * "pd.return" (%m) * } ... - * "pd.fetch" %d, %f + * "pd.fetch" (%d, %f) * } * TODO(winter-wang): Supplementary how to judge the operators can be supported * by tensorrt. */ class trtOpTellerPass - : public ::mlir::PassWrapper { + : public mlir::PassWrapper { public: ::llvm::StringRef getName() const override { return "trtOpTellerPass"; } void runOnFunction() override; diff --git a/paddle/infrt/dialect/tensorrt/trt_ops.cc b/paddle/infrt/dialect/tensorrt/trt_ops.cc index 4c02238b10e1d..35b7967892caf 100644 --- a/paddle/infrt/dialect/tensorrt/trt_ops.cc +++ b/paddle/infrt/dialect/tensorrt/trt_ops.cc @@ -13,27 +13,25 @@ // limitations under the License. #include "paddle/infrt/dialect/tensorrt/trt_ops.h" -#include "mlir/IR/Matchers.h" -#include "mlir/IR/OpImplementation.h" -#include "mlir/IR/PatternMatch.h" -#include "mlir/Interfaces/CallInterfaces.h" -#include "mlir/Interfaces/SideEffectInterfaces.h" +#include +#include +#include +#include +#include namespace infrt { namespace trt { -TensorRTDialect::TensorRTDialect(::mlir::MLIRContext *context) - : ::mlir::Dialect("trt", context, ::mlir::TypeID::get()) { +TensorRTDialect::TensorRTDialect(mlir::MLIRContext *context) + : mlir::Dialect("trt", context, mlir::TypeID::get()) { addOperations< #define GET_OP_LIST #include "paddle/infrt/dialect/tensorrt/trt_ops.cpp.inc" // NOLINT >(); -#undef GET_OP_LIST } -#define GET_OP_CLASSES -#include "paddle/infrt/dialect/tensorrt/trt_ops.cpp.inc" // NOLINT -#undef GET_OP_CLASSES - } // namespace trt } // namespace infrt + +#define GET_OP_CLASSES +#include "paddle/infrt/dialect/tensorrt/trt_ops.cpp.inc" // NOLINT diff --git a/paddle/infrt/dialect/tensorrt/trt_ops.h b/paddle/infrt/dialect/tensorrt/trt_ops.h index c9043c2280de0..a37491ec1abc7 100644 --- a/paddle/infrt/dialect/tensorrt/trt_ops.h +++ b/paddle/infrt/dialect/tensorrt/trt_ops.h @@ -14,37 +14,32 @@ #pragma once -#include "mlir/Dialect/Traits.h" -#include "mlir/IR/Attributes.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/Dialect.h" -#include "mlir/IR/Function.h" -#include "mlir/IR/Matchers.h" -#include "mlir/IR/Module.h" -#include "mlir/IR/OpImplementation.h" -#include "mlir/IR/StandardTypes.h" -#include "mlir/IR/TypeUtilities.h" -#include "mlir/Interfaces/CallInterfaces.h" -#include "mlir/Interfaces/DerivedAttributeOpInterface.h" -#include "mlir/Interfaces/InferTypeOpInterface.h" -#include "mlir/Interfaces/LoopLikeInterface.h" -#include "mlir/Interfaces/SideEffectInterfaces.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace infrt { namespace trt { -class TensorRTDialect : public ::mlir::Dialect { +class TensorRTDialect : public mlir::Dialect { public: - explicit TensorRTDialect(::mlir::MLIRContext* context); + explicit TensorRTDialect(mlir::MLIRContext* context); static llvm::StringRef getDialectNamespace() { return "trt"; } }; -// mlir bug。 can be removed safety when update mlir to llvm11. -using namespace mlir; // NOLINT +} // namespace trt +} // namespace infrt #define GET_OP_CLASSES #include "paddle/infrt/dialect/tensorrt/trt_ops.hpp.inc" -#undef GET_OP_CLASSES - -} // namespace trt -} // namespace infrt diff --git a/paddle/infrt/dialect/test_kernels.cc b/paddle/infrt/dialect/test_kernels.cc index 894d96f95ad5c..c4588d7cf8bab 100644 --- a/paddle/infrt/dialect/test_kernels.cc +++ b/paddle/infrt/dialect/test_kernels.cc @@ -14,14 +14,13 @@ #include "paddle/infrt/dialect/test_kernels.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/OpDefinition.h" -#include "mlir/IR/OpImplementation.h" -#include "mlir/IR/StandardTypes.h" -#include "mlir/IR/TypeUtilities.h" - -namespace infrt::dialect { +#include +#include +#include +#include +namespace infrt { +namespace dialect { //===----------------------------------------------------------------------===// // BenchmarkOp //===----------------------------------------------------------------------===// @@ -32,65 +31,67 @@ namespace infrt::dialect { // ... // } -static ParseResult parseBenchmarkOp(OpAsmParser &parser, // NOLINT - OperationState &result) { // NOLINT - StringAttr nameAttr; +static mlir::ParseResult parseBenchmarkOp( + mlir::OpAsmParser &parser, // NOLINT + mlir::OperationState &result) { // NOLINT + mlir::StringAttr nameAttr; if (parser.parseAttribute(nameAttr, "name", result.attributes)) - return failure(); + return mlir::failure(); // Parse the operands, e.g. (%c : i32, %d : f32) - if (parser.parseLParen()) return failure(); + if (parser.parseLParen()) return mlir::failure(); - SmallVector operands; - SmallVector types; + llvm::SmallVector operands; + llvm::SmallVector types; llvm::SMLoc type_loc = parser.getCurrentLocation(); if (parser.parseOptionalRParen()) { // Parse non-empty operands do { // Parse %c : i32, - OpAsmParser::OperandType operand; - Type type; + mlir::OpAsmParser::OperandType operand; + mlir::Type type; if (parser.parseOperand(operand) || parser.parseColonType(type)) - return failure(); + return mlir::failure(); operands.push_back(operand); types.push_back(type); } while (succeeded(parser.parseOptionalComma())); - if (parser.parseRParen()) return failure(); + if (parser.parseRParen()) return mlir::failure(); } if (parser.resolveOperands(operands, types, type_loc, result.operands)) - return failure(); + return mlir::failure(); // Parse the keyword attribute, e.g. max_count = 100, duration_secs = 1 do { - StringRef attr; - Attribute resultAttr; + mlir::StringRef attr; + mlir::Attribute resultAttr; if (parser.parseKeyword(&attr) || parser.parseEqual() || parser.parseAttribute(resultAttr, parser.getBuilder().getIntegerType(32), attr, result.attributes)) - return failure(); - } while (succeeded(parser.parseOptionalComma())); + return mlir::failure(); + } while (mlir::succeeded(parser.parseOptionalComma())); // Set the default attribute num_warmup_runs to 1 if unset auto setDefaultAttrIfUnset = [&](const char *attr_name, int value) { bool found = llvm::any_of(result.attributes, - [attr_name](const NamedAttribute &attr) { - return attr.first == attr_name; + [attr_name](const mlir::NamedAttribute &attr) { + return attr.getName() == attr_name; }); if (!found) { - IntegerAttr default_val = parser.getBuilder().getI32IntegerAttr(value); + mlir::IntegerAttr default_val = + parser.getBuilder().getI32IntegerAttr(value); result.addAttribute(attr_name, default_val); } }; setDefaultAttrIfUnset("num_warmup_runs", 1); - Region *target = result.addRegion(); + mlir::Region *target = result.addRegion(); return parser.parseRegion(*target, operands, types, @@ -102,11 +103,11 @@ static ParseResult parseBenchmarkOp(OpAsmParser &parser, // NOLINT // max_count = 100, duration_secs = 1 { // ... // } -static void print(OpAsmPrinter &p, BenchmarkOp op) { // NOLINT +static void print(mlir::OpAsmPrinter &p, BenchmarkOp op) { // NOLINT p << "infrt.benchmark "; // Print the name attribute, e.g "add.i32" - auto name_attr = op.getAttr("name"); + auto name_attr = op->getAttr("name"); p << name_attr; // Print the operands and types, e.g. (%c : i32, %d : f32) @@ -120,13 +121,13 @@ static void print(OpAsmPrinter &p, BenchmarkOp op) { // NOLINT bool need_comma = false; // Print the attributes, e.g. max_count = 100, duration_secs = 1 - for (auto &name_attr : op.getAttrs()) { - auto id = name_attr.first; + for (auto &name_attr : op->getAttrs()) { + auto id = name_attr.getName(); if (id == "name") continue; if (need_comma) p << ", "; - auto attr = name_attr.second; + auto attr = name_attr.getValue(); p << id << " = "; - if (auto int_attr = attr.dyn_cast()) { + if (auto int_attr = attr.dyn_cast()) { int_attr.getValue().print(p.getStream(), /*isSigned=*/false); } else { op.emitOpError("Unexpected attribute"); @@ -142,7 +143,7 @@ static void print(OpAsmPrinter &p, BenchmarkOp op) { // NOLINT p.printRegion(op.region(), /*printEntryBlockArgs=*/false); } -static LogicalResult verify(BenchmarkOp op) { +static mlir::LogicalResult verify(BenchmarkOp op) { // Verify that the target benchmark region has exactly one return value. auto ®ion = op.region(); auto &last_op = region.front().back(); @@ -154,10 +155,10 @@ static LogicalResult verify(BenchmarkOp op) { "incorrect number of return values. One return value is expected"); } - return success(); + return mlir::success(); } +} // namespace dialect +} // namespace infrt #define GET_OP_CLASSES #include "paddle/infrt/dialect/test_kernels.cpp.inc" - -} // namespace infrt::dialect diff --git a/paddle/infrt/dialect/test_kernels.h b/paddle/infrt/dialect/test_kernels.h index 29d4209cb7280..73c8a6fb387bc 100644 --- a/paddle/infrt/dialect/test_kernels.h +++ b/paddle/infrt/dialect/test_kernels.h @@ -13,11 +13,8 @@ // limitations under the License. #pragma once -#include "mlir/IR/OpDefinition.h" -#include "mlir/Interfaces/SideEffectInterfaces.h" +#include +#include -namespace infrt::dialect { -using namespace mlir; // NOLINT #define GET_OP_CLASSES #include "paddle/infrt/dialect/test_kernels.hpp.inc" -} // namespace infrt::dialect diff --git a/paddle/infrt/dialect/types.cc b/paddle/infrt/dialect/types.cc deleted file mode 100644 index 6d6f6a20b46c9..0000000000000 --- a/paddle/infrt/dialect/types.cc +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/infrt/dialect/types.h" - -namespace infrt::hlir::mlir {} // namespace infrt::hlir::mlir diff --git a/paddle/infrt/dialect/types.h b/paddle/infrt/dialect/types.h deleted file mode 100644 index a9a2b61871cc0..0000000000000 --- a/paddle/infrt/dialect/types.h +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include diff --git a/paddle/infrt/host_context/core_runtime.cc b/paddle/infrt/host_context/core_runtime.cc index cdb8cc99ecb26..e3917bd07d242 100644 --- a/paddle/infrt/host_context/core_runtime.cc +++ b/paddle/infrt/host_context/core_runtime.cc @@ -23,7 +23,8 @@ #include "paddle/infrt/host_context/op_executable.h" #include "paddle/infrt/host_context/symbol_table.h" -namespace infrt::host_context { +namespace infrt { +namespace host_context { struct CoreRuntime::Impl { KernelRegistry* kernel_registry{}; @@ -90,4 +91,5 @@ llvm::SmallVector CoreRuntime::GetResults( CoreRuntime::~CoreRuntime() {} -} // namespace infrt::host_context +} // namespace host_context +} // namespace infrt diff --git a/paddle/infrt/host_context/core_runtime.h b/paddle/infrt/host_context/core_runtime.h index 802f8b17bb010..acb6a66cac630 100644 --- a/paddle/infrt/host_context/core_runtime.h +++ b/paddle/infrt/host_context/core_runtime.h @@ -22,7 +22,8 @@ #include "paddle/infrt/host_context/value.h" -namespace infrt::host_context { +namespace infrt { +namespace host_context { class KernelRegistry; class OpExecutable; @@ -83,4 +84,5 @@ class CoreRuntimeBuilder : public CoreRuntime { OpExecutableBuilder* NewOpExecutable(const std::string& op_name); }; -} // namespace infrt::host_context +} // namespace host_context +} // namespace infrt diff --git a/paddle/infrt/host_context/kernel_frame.h b/paddle/infrt/host_context/kernel_frame.h index 20cb17dc7fbe2..5186b88fe2c41 100644 --- a/paddle/infrt/host_context/kernel_frame.h +++ b/paddle/infrt/host_context/kernel_frame.h @@ -21,7 +21,8 @@ #include "llvm/ADT/SmallVector.h" #include "paddle/infrt/host_context/value.h" -namespace infrt::host_context { +namespace infrt { +namespace host_context { /** * KernelFrame captures the states(input arguments, attributes, results) @@ -163,4 +164,5 @@ class KernelFrameBuilder : public KernelFrame { } }; -} // namespace infrt::host_context +} // namespace host_context +} // namespace infrt diff --git a/paddle/infrt/host_context/kernel_registry_test.cc b/paddle/infrt/host_context/kernel_registry_test.cc index f36ec2a1cac7d..7fca56343041c 100644 --- a/paddle/infrt/host_context/kernel_registry_test.cc +++ b/paddle/infrt/host_context/kernel_registry_test.cc @@ -18,7 +18,8 @@ #include "paddle/infrt/host_context/kernel_utils.h" -namespace infrt::host_context { +namespace infrt { +namespace host_context { int add_i32(int a, int b) { return a + b; } @@ -44,4 +45,5 @@ TEST(KernelRegistry, basic) { ASSERT_EQ(results[0]->get(), 3); } -} // namespace infrt::host_context +} // namespace host_context +} // namespace infrt diff --git a/paddle/infrt/host_context/kernel_utils_test.cc b/paddle/infrt/host_context/kernel_utils_test.cc index 1904eb106a293..bebd8d86e50bb 100644 --- a/paddle/infrt/host_context/kernel_utils_test.cc +++ b/paddle/infrt/host_context/kernel_utils_test.cc @@ -16,7 +16,8 @@ #include -namespace infrt::host_context { +namespace infrt { +namespace host_context { int add_i32(int a, int b) { return a + b; } float add_f32(float a, float b) { return a + b; } @@ -66,4 +67,5 @@ TEST(KernelImpl, pair) { ASSERT_EQ(results[1]->get(), 3.f); } -} // namespace infrt::host_context +} // namespace host_context +} // namespace infrt diff --git a/paddle/infrt/host_context/mlir_function_executable.cc b/paddle/infrt/host_context/mlir_function_executable.cc index 5f8dacf8e448a..47ec27ebec300 100644 --- a/paddle/infrt/host_context/mlir_function_executable.cc +++ b/paddle/infrt/host_context/mlir_function_executable.cc @@ -15,6 +15,7 @@ #include "paddle/infrt/host_context/mlir_function_executable.h" #include +#include #include // NOLINT diff --git a/paddle/infrt/host_context/mlir_function_executable.h b/paddle/infrt/host_context/mlir_function_executable.h index ba5fa154d6fcc..a6428df86e6b2 100644 --- a/paddle/infrt/host_context/mlir_function_executable.h +++ b/paddle/infrt/host_context/mlir_function_executable.h @@ -13,7 +13,8 @@ // limitations under the License. #pragma once -#include +#include +#include #include #include diff --git a/paddle/infrt/host_context/mlir_program_executor.h b/paddle/infrt/host_context/mlir_program_executor.h index b2af4d2d79db5..c2ccb90640b21 100644 --- a/paddle/infrt/host_context/mlir_program_executor.h +++ b/paddle/infrt/host_context/mlir_program_executor.h @@ -15,9 +15,9 @@ #pragma once #include +#include +#include #include -#include -#include #include #include diff --git a/paddle/infrt/host_context/mlir_to_runtime_translate.cc b/paddle/infrt/host_context/mlir_to_runtime_translate.cc index 25324b1291582..3dbc7a702be38 100644 --- a/paddle/infrt/host_context/mlir_to_runtime_translate.cc +++ b/paddle/infrt/host_context/mlir_to_runtime_translate.cc @@ -16,8 +16,9 @@ #include #include +#include +#include #include -#include #include #include @@ -40,7 +41,8 @@ #include "paddle/infrt/host_context/value.h" #include "paddle/infrt/tensor/tensor_shape.h" -namespace infrt::host_context { +namespace infrt { +namespace host_context { template std::string DumpToString(T& op) { // NOLINT @@ -113,10 +115,10 @@ bool MlirToRuntimeTranslator::EmitConstantOp(mlir::Operation* op) { template <> boost::optional MlirToRuntimeTranslator::EmitAttribute( - const mlir::Attribute* attr) { - if (!attr->isa()) return boost::none; - if (attr->isa()) { - auto val = attr->cast(); + const mlir::Attribute& attr) { + if (!attr.isa()) return boost::none; + if (attr.isa()) { + auto val = attr.cast(); if (val.getType().isInteger(32)) { return val.getInt(); } @@ -125,10 +127,10 @@ boost::optional MlirToRuntimeTranslator::EmitAttribute( } template <> boost::optional MlirToRuntimeTranslator::EmitAttribute( - const mlir::Attribute* attr) { - if (!attr->isa()) return boost::none; - if (attr->isa()) { - auto val = attr->cast(); + const mlir::Attribute& attr) { + if (!attr.isa()) return boost::none; + if (attr.isa()) { + auto val = attr.cast(); if (val.getType().isInteger(64)) { return val.getInt(); } @@ -139,10 +141,10 @@ boost::optional MlirToRuntimeTranslator::EmitAttribute( // TODO(Superjomn) Make double and float parsing share some thing. template <> boost::optional MlirToRuntimeTranslator::EmitAttribute( - const mlir::Attribute* attr) { - if (!attr->isa()) return boost::none; - if (attr->isa()) { - auto val = attr->cast(); + const mlir::Attribute& attr) { + if (!attr.isa()) return boost::none; + if (attr.isa()) { + auto val = attr.cast(); if (val.getType().isF32()) return val.getValueAsDouble(); } return boost::none; @@ -150,10 +152,10 @@ boost::optional MlirToRuntimeTranslator::EmitAttribute( template <> boost::optional MlirToRuntimeTranslator::EmitAttribute( - const mlir::Attribute* attr) { - if (!attr->isa()) return boost::none; - if (attr->isa()) { - auto val = attr->cast(); + const mlir::Attribute& attr) { + if (!attr.isa()) return boost::none; + if (attr.isa()) { + auto val = attr.cast(); if (val.getType().isF64()) return val.getValueAsDouble(); } return boost::none; @@ -161,17 +163,17 @@ boost::optional MlirToRuntimeTranslator::EmitAttribute( template <> boost::optional MlirToRuntimeTranslator::EmitAttribute( - const mlir::Attribute* attr) { - if (!attr->isa()) return boost::none; - return attr->cast().getValue().str(); + const mlir::Attribute& attr) { + if (!attr.isa()) return boost::none; + return attr.cast().getValue().str(); } #define PROCESS_ARRAY_INT(type__, bits__) \ template <> \ boost::optional> MlirToRuntimeTranslator::EmitAttribute( \ - const mlir::Attribute* attr) { \ - if (!attr->isa()) return boost::none; \ - auto array = attr->cast(); \ + const mlir::Attribute& attr) { \ + if (!attr.isa()) return boost::none; \ + auto array = attr.cast(); \ CHECK(!array.empty()); \ \ if (!array[0].getType().isInteger(bits__)) { \ @@ -191,9 +193,9 @@ PROCESS_ARRAY_INT(int64_t, 64); template <> boost::optional> MlirToRuntimeTranslator::EmitAttribute( - const mlir::Attribute* attr) { - if (!attr->isa()) return boost::none; - auto array = attr->cast(); + const mlir::Attribute& attr) { + if (!attr.isa()) return boost::none; + auto array = attr.cast(); CHECK(!array.empty()); if (!array[0].getType().isF32()) return boost::none; @@ -207,9 +209,9 @@ boost::optional> MlirToRuntimeTranslator::EmitAttribute( template <> boost::optional> MlirToRuntimeTranslator::EmitAttribute( - const mlir::Attribute* attr) { - if (!attr->isa()) return boost::none; - auto array = attr->cast(); + const mlir::Attribute& attr) { + if (!attr.isa()) return boost::none; + auto array = attr.cast(); CHECK(!array.empty()); if (!array[0].getType().isF64()) return boost::none; @@ -236,7 +238,8 @@ bool MlirToRuntimeTranslator::EmitGeneralOp(mlir::Operation* op) { for (int i = 0, e = op->getNumOperands(); i < e; i++) { // function argument as value auto operand = op->getOperand(i); - if (operand.getKind() == mlir::Value::Kind::BlockArgument) { + /// if (operand.getKind() == mlir::Value::Kind::BlockArgument) { + if (operand.isa()) { mlir::BlockArgument arg = operand.dyn_cast(); Value* arg_value = GetValue(arg); impl_->cur_op->AppendArgument(arg_value); @@ -283,25 +286,25 @@ bool MlirToRuntimeTranslator::EmitGeneralOp(mlir::Operation* op) { for (size_t i = 0; i < attrs.size(); i++) { auto& attr = attrs[i]; - if (auto v = EmitAttribute(&attr.second)) { + if (auto v = EmitAttribute(attr.getValue())) { impl_->cur_op->AppendAttribute(new Value(*v)); - } else if (auto v = EmitAttribute(&attr.second)) { + } else if (auto v = EmitAttribute(attr.getValue())) { impl_->cur_op->AppendAttribute(new Value(*v)); - } else if (auto v = EmitAttribute(&attr.second)) { + } else if (auto v = EmitAttribute(attr.getValue())) { impl_->cur_op->AppendAttribute(new Value(*v)); - } else if (auto v = EmitAttribute(&attr.second)) { + } else if (auto v = EmitAttribute(attr.getValue())) { impl_->cur_op->AppendAttribute(new Value(*v)); - } else if (auto v = EmitAttribute(&attr.second)) { + } else if (auto v = EmitAttribute(attr.getValue())) { impl_->cur_op->AppendAttribute(new Value(std::move(*v))); - } else if (auto v = EmitAttribute>(&attr.second)) { + } else if (auto v = EmitAttribute>(attr.getValue())) { impl_->cur_op->AppendAttribute(new Value(std::move(*v))); - } else if (auto v = EmitAttribute>(&attr.second)) { + } else if (auto v = EmitAttribute>(attr.getValue())) { impl_->cur_op->AppendAttribute(new Value(std::move(*v))); - } else if (auto v = EmitAttribute>(&attr.second)) { + } else if (auto v = EmitAttribute>(attr.getValue())) { impl_->cur_op->AppendAttribute(new Value(std::move(*v))); - } else if (auto v = EmitAttribute>(&attr.second)) { + } else if (auto v = EmitAttribute>(attr.getValue())) { impl_->cur_op->AppendAttribute(new Value(std::move(*v))); - } else if (auto v = EmitAttribute>(&attr.second)) { + } else if (auto v = EmitAttribute>(attr.getValue())) { impl_->cur_op->AppendAttribute(new Value(std::move(*v))); } else { LOG(FATAL) << "Not supported attribute type"; @@ -330,7 +333,7 @@ bool MlirToRuntimeTranslator::EmitGeneralOp(mlir::Operation* op) { llvm::SmallVector results; auto func_type = - mlir::FunctionType::get(inputs, results, region.getContext()); + mlir::FunctionType::get(region.getContext(), inputs, results); auto* function = impl_->cur_op->CreateFunctionExecutable( ®ion, func_type, &impl_->func_defs); impl_->cur_op->AppendAttribute(new Value(function)); @@ -555,4 +558,5 @@ void TestMlir(mlir::ModuleOp module, KernelRegistry* registry) { execute.Run(); } -} // namespace infrt::host_context +} // namespace host_context +} // namespace infrt diff --git a/paddle/infrt/host_context/mlir_to_runtime_translate.h b/paddle/infrt/host_context/mlir_to_runtime_translate.h index 598e81bfd96d8..fcd79eaf386ee 100644 --- a/paddle/infrt/host_context/mlir_to_runtime_translate.h +++ b/paddle/infrt/host_context/mlir_to_runtime_translate.h @@ -29,7 +29,8 @@ class Attribute; class Value; } // namespace mlir -namespace infrt::host_context { +namespace infrt { +namespace host_context { class CoreRuntimeBuilder; class Value; @@ -73,7 +74,7 @@ class MlirToRuntimeTranslator { bool EmitCallOp(mlir::Operation* op, function_defs_t* function_table); template - boost::optional EmitAttribute(const mlir::Attribute* attr); + boost::optional EmitAttribute(const mlir::Attribute& attr); Value* GetOpResult(mlir::Operation* op); @@ -104,4 +105,5 @@ void MlirToRuntimeTranslate(mlir::ModuleOp module, CoreRuntimeBuilder* runtime); */ void TestMlir(mlir::ModuleOp module, KernelRegistry* registry); -} // namespace infrt::host_context +} // namespace host_context +} // namespace infrt diff --git a/paddle/infrt/host_context/mlir_to_runtime_translate_test.cc b/paddle/infrt/host_context/mlir_to_runtime_translate_test.cc index 9b85be977ab6c..375daa4515e17 100644 --- a/paddle/infrt/host_context/mlir_to_runtime_translate_test.cc +++ b/paddle/infrt/host_context/mlir_to_runtime_translate_test.cc @@ -29,7 +29,8 @@ #include "paddle/infrt/kernel/tensor_shape_kernels.h" #include "paddle/infrt/kernel/test_kernels.h" -namespace infrt::host_context { +namespace infrt { +namespace host_context { TEST(MlirToRuntimeTranslate, basic) { mlir::MLIRContext context; @@ -48,7 +49,7 @@ func @main() -> () { )ROC"; auto module = dialect::LoadMlirSource(&context, source); - module->verify(); + EXPECT_TRUE(mlir::succeeded(module->verify())); KernelRegistry registry; kernel::RegisterFloatBasicKernels(®istry); @@ -74,7 +75,7 @@ func @main() -> () { )ROC"; auto module = dialect::LoadMlirSource(&context, source); - module->verify(); + EXPECT_TRUE(mlir::succeeded(module->verify())); KernelRegistry registry; kernel::RegisterFloatBasicKernels(®istry); @@ -115,7 +116,7 @@ infrt.return %a0, %b0: !infrt.tensor, !infrt.tensorverify(); + EXPECT_TRUE(mlir::succeeded(module->verify())); host_context::KernelRegistry registry; @@ -157,4 +158,5 @@ infrt.return %a0, %b0: !infrt.tensor, !infrt.tensor #include #include "paddle/infrt/host_context/kernel_frame.h" @@ -21,7 +22,8 @@ #include "paddle/infrt/host_context/mlir_function_executable.h" #include "paddle/infrt/host_context/symbol_table.h" -namespace infrt::host_context { +namespace infrt { +namespace host_context { struct OpExecutable::Impl { Impl(const std::string& op_name, @@ -148,4 +150,5 @@ void OpExecutable::Execute() { OpExecutable::~OpExecutable() {} -} // namespace infrt::host_context +} // namespace host_context +} // namespace infrt diff --git a/paddle/infrt/host_context/op_executable.h b/paddle/infrt/host_context/op_executable.h index e2248225a5caf..550f6ab6349ed 100644 --- a/paddle/infrt/host_context/op_executable.h +++ b/paddle/infrt/host_context/op_executable.h @@ -14,19 +14,18 @@ #pragma once #include - +#include +#include #include #include #include -#include "mlir/IR/Function.h" -#include "mlir/IR/Region.h" - namespace mlir { class FuncOp; } // namespace mlir -namespace infrt::host_context { +namespace infrt { +namespace host_context { class SymbolTable; class KernelRegistry; @@ -89,4 +88,5 @@ class OpExecutableBuilder : public OpExecutable { function_defs_t* function_defs); }; -} // namespace infrt::host_context +} // namespace host_context +} // namespace infrt diff --git a/paddle/infrt/kernel/basic_kernels.cc b/paddle/infrt/kernel/basic_kernels.cc index d7f2c3865157d..b186cfcfd2b35 100644 --- a/paddle/infrt/kernel/basic_kernels.cc +++ b/paddle/infrt/kernel/basic_kernels.cc @@ -23,7 +23,8 @@ using infrt::host_context::Attribute; -namespace infrt::kernel { +namespace infrt { +namespace kernel { template T add(T a, T b) { @@ -82,4 +83,5 @@ void RegisterFloatBasicKernels(host_context::KernelRegistry *registry) { registry->AddKernel("infrt.print.f32", INFRT_KERNEL(print)); } -} // namespace infrt::kernel +} // namespace kernel +} // namespace infrt diff --git a/paddle/infrt/kernel/basic_kernels.h b/paddle/infrt/kernel/basic_kernels.h index 9e98885cf6ebf..feb66be61f530 100644 --- a/paddle/infrt/kernel/basic_kernels.h +++ b/paddle/infrt/kernel/basic_kernels.h @@ -15,13 +15,16 @@ #pragma once #include -namespace infrt::host_context { +namespace infrt { +namespace host_context { struct KernelRegistry; -} // namespace infrt::host_context +} // namespace host_context +} // namespace infrt -namespace infrt::kernel { +namespace infrt { +namespace kernel { /** * Register all the basic kernels to \p registry. @@ -31,4 +34,5 @@ void RegisterBasicKernels(host_context::KernelRegistry* registry); void RegisterIntBasicKernels(host_context::KernelRegistry* registry); void RegisterFloatBasicKernels(host_context::KernelRegistry* registry); -} // namespace infrt::kernel +} // namespace kernel +} // namespace infrt diff --git a/paddle/infrt/kernel/tensor_kernels.cc b/paddle/infrt/kernel/tensor_kernels.cc index 2fa477aa4dbda..51e0004922374 100644 --- a/paddle/infrt/kernel/tensor_kernels.cc +++ b/paddle/infrt/kernel/tensor_kernels.cc @@ -25,7 +25,8 @@ #include "paddle/infrt/tensor/tensor_map.h" #include "paddle/infrt/tensor/tensor_shape.h" -namespace infrt::kernel { +namespace infrt { +namespace kernel { using namespace host_context; // NOLINT using namespace tensor; // NOLINT @@ -76,4 +77,5 @@ void RegisterTensorKernels(host_context::KernelRegistry *registry) { INFRT_KERNEL(ShallowCopyTensor)); } -} // namespace infrt::kernel +} // namespace kernel +} // namespace infrt diff --git a/paddle/infrt/kernel/tensor_kernels.h b/paddle/infrt/kernel/tensor_kernels.h index 8f2180ba80a4f..df8e25c32393c 100644 --- a/paddle/infrt/kernel/tensor_kernels.h +++ b/paddle/infrt/kernel/tensor_kernels.h @@ -14,12 +14,16 @@ #pragma once -namespace infrt::host_context { +namespace infrt { +namespace host_context { struct KernelRegistry; -} // namespace infrt::host_context +} // namespace host_context +} // namespace infrt -namespace infrt::kernel { +namespace infrt { +namespace kernel { void RegisterTensorKernels(host_context::KernelRegistry* registry); -} // namespace infrt::kernel +} // namespace kernel +} // namespace infrt diff --git a/paddle/infrt/kernel/tensor_shape_kernels.cc b/paddle/infrt/kernel/tensor_shape_kernels.cc index a04b492819298..4edbecfa10886 100644 --- a/paddle/infrt/kernel/tensor_shape_kernels.cc +++ b/paddle/infrt/kernel/tensor_shape_kernels.cc @@ -24,7 +24,8 @@ #include "paddle/infrt/host_context/kernel_utils.h" #include "paddle/infrt/tensor/tensor_shape.h" -namespace infrt::kernel { +namespace infrt { +namespace kernel { void PrintShape(const tensor::TensorShape& shape) { llvm::raw_os_ostream oos(std::cout); @@ -35,4 +36,5 @@ void RegisterTensorShapeKernels(host_context::KernelRegistry* registry) { registry->AddKernel("ts.print_shape", INFRT_KERNEL(PrintShape)); } -} // namespace infrt::kernel +} // namespace kernel +} // namespace infrt diff --git a/paddle/infrt/kernel/tensor_shape_kernels.h b/paddle/infrt/kernel/tensor_shape_kernels.h index e87c6c37e88a0..e31a37463be43 100644 --- a/paddle/infrt/kernel/tensor_shape_kernels.h +++ b/paddle/infrt/kernel/tensor_shape_kernels.h @@ -14,14 +14,18 @@ #pragma once -namespace infrt::host_context { +namespace infrt { +namespace host_context { class KernelRegistry; -} // namespace infrt::host_context +} // namespace host_context +} // namespace infrt -namespace infrt::kernel { +namespace infrt { +namespace kernel { void RegisterTensorShapeKernels(host_context::KernelRegistry* registry); -} // namespace infrt::kernel +} // namespace kernel +} // namespace infrt diff --git a/paddle/infrt/kernel/test_kernels.cc b/paddle/infrt/kernel/test_kernels.cc index d5f64d09b602f..ccfb3356a855f 100644 --- a/paddle/infrt/kernel/test_kernels.cc +++ b/paddle/infrt/kernel/test_kernels.cc @@ -33,7 +33,8 @@ using infrt::host_context::Attribute; using infrt::host_context::MlirFunctionExecutable; using infrt::host_context::RemainingArguments; -namespace infrt::kernel { +namespace infrt { +namespace kernel { namespace { class BenchmarkStats { public: @@ -197,4 +198,5 @@ void RegisterTestKernels(host_context::KernelRegistry *registry) { INFRT_KERNEL(ShadowCopyTensor)); } -} // namespace infrt::kernel +} // namespace kernel +} // namespace infrt diff --git a/paddle/infrt/kernel/test_kernels.h b/paddle/infrt/kernel/test_kernels.h index f42884dfaf2c9..f5639ec1afaad 100644 --- a/paddle/infrt/kernel/test_kernels.h +++ b/paddle/infrt/kernel/test_kernels.h @@ -15,17 +15,21 @@ #pragma once #include -namespace infrt::host_context { +namespace infrt { +namespace host_context { struct KernelRegistry; -} // namespace infrt::host_context +} // namespace host_context +} // namespace infrt -namespace infrt::kernel { +namespace infrt { +namespace kernel { /** * Register all the test kernels to registry. */ void RegisterTestKernels(host_context::KernelRegistry* registry); -} // namespace infrt::kernel +} // namespace kernel +} // namespace infrt diff --git a/paddle/infrt/paddle/cpp/desc_api.h b/paddle/infrt/paddle/cpp/desc_api.h index ccd79c048ab14..3b2dcb0018b2f 100644 --- a/paddle/infrt/paddle/cpp/desc_api.h +++ b/paddle/infrt/paddle/cpp/desc_api.h @@ -18,7 +18,9 @@ #include #include -namespace infrt::paddle::cpp { +namespace infrt { +namespace paddle { +namespace cpp { /* * Compatible interfaces for all the different kinds of XXXDesc. All the XXXDesc @@ -226,4 +228,6 @@ class ProgramDescAPI { virtual void SetVersion(int64_t version) = 0; }; -} // namespace infrt::paddle::cpp +} // namespace cpp +} // namespace paddle +} // namespace infrt diff --git a/paddle/infrt/paddle/model_parser.cc b/paddle/infrt/paddle/model_parser.cc index 285280e69435b..f3de1a630451c 100644 --- a/paddle/infrt/paddle/model_parser.cc +++ b/paddle/infrt/paddle/model_parser.cc @@ -22,7 +22,8 @@ #include "paddle/infrt/common/target.h" #include "paddle/infrt/common/type.h" -namespace infrt::paddle { +namespace infrt { +namespace paddle { int SizeOfType(framework_proto::VarType::Type type) { using Type = framework_proto::VarType::Type; @@ -169,4 +170,5 @@ void LoadParam(const std::string &path, _Variable *out, const Target &target) { LoadLoDTensor(fin, out, target); } -} // namespace infrt::paddle +} // namespace paddle +} // namespace infrt diff --git a/paddle/infrt/paddle/model_parser.h b/paddle/infrt/paddle/model_parser.h index 73125fadedb82..373f77033dcef 100644 --- a/paddle/infrt/paddle/model_parser.h +++ b/paddle/infrt/paddle/model_parser.h @@ -25,7 +25,8 @@ #include "paddle/infrt/paddle/scope.h" #include "paddle/infrt/paddle/tensor.h" -namespace infrt::paddle { +namespace infrt { +namespace paddle { namespace framework_proto = ::paddle::framework::proto; // Read a __model__ file. @@ -52,4 +53,5 @@ void TensorFromStream( const common::Target& target = common::DefaultHostTarget()); void ReadBinaryFile(const std::string& filename, std::string* contents); -} // namespace infrt::paddle +} // namespace paddle +} // namespace infrt diff --git a/paddle/infrt/paddle/pb/block_desc.cc b/paddle/infrt/paddle/pb/block_desc.cc index 11186bc68af16..5b28fa5464c54 100644 --- a/paddle/infrt/paddle/pb/block_desc.cc +++ b/paddle/infrt/paddle/pb/block_desc.cc @@ -14,7 +14,9 @@ #include "paddle/infrt/paddle/pb/block_desc.h" -namespace infrt::paddle::pb { +namespace infrt { +namespace paddle { +namespace pb { template <> framework_proto::VarDesc* BlockDesc::GetVar( @@ -40,4 +42,6 @@ framework_proto::OpDesc* BlockDesc::AddOp() { return desc_->add_ops(); } -} // namespace infrt::paddle::pb +} // namespace pb +} // namespace paddle +} // namespace infrt diff --git a/paddle/infrt/paddle/pb/block_desc.h b/paddle/infrt/paddle/pb/block_desc.h index 9c1b7f9adf172..c9e325699a4bc 100644 --- a/paddle/infrt/paddle/pb/block_desc.h +++ b/paddle/infrt/paddle/pb/block_desc.h @@ -18,7 +18,9 @@ #include "paddle/infrt/paddle/cpp/desc_api.h" #include "paddle/infrt/paddle/framework.pb.h" -namespace infrt::paddle::pb { +namespace infrt { +namespace paddle { +namespace pb { namespace framework_proto = ::paddle::framework::proto; @@ -74,4 +76,6 @@ class BlockDesc : public cpp::BlockDescAPI { framework_proto::BlockDesc* desc_; // not_own }; -} // namespace infrt::paddle::pb +} // namespace pb +} // namespace paddle +} // namespace infrt diff --git a/paddle/infrt/paddle/pb/op_desc.cc b/paddle/infrt/paddle/pb/op_desc.cc index c7b1e66f50642..32dcefb1ac684 100644 --- a/paddle/infrt/paddle/pb/op_desc.cc +++ b/paddle/infrt/paddle/pb/op_desc.cc @@ -14,7 +14,9 @@ #include "paddle/infrt/paddle/pb/op_desc.h" -namespace infrt::paddle::pb { +namespace infrt { +namespace paddle { +namespace pb { google::protobuf::internal::RepeatedPtrIterator FindAttr(framework_proto::OpDesc *desc, const std::string &name) { @@ -136,4 +138,6 @@ GET_ATTRS_IMPL(std::vector, strings); GET_ATTR_IMPL(std::string, s); GET_ATTRS_IMPL(std::vector, longs); -} // namespace infrt::paddle::pb +} // namespace pb +} // namespace paddle +} // namespace infrt diff --git a/paddle/infrt/paddle/pb/op_desc.h b/paddle/infrt/paddle/pb/op_desc.h index 81d57d9f32252..2829f2aca2e08 100644 --- a/paddle/infrt/paddle/pb/op_desc.h +++ b/paddle/infrt/paddle/pb/op_desc.h @@ -19,7 +19,9 @@ #include "paddle/infrt/paddle/framework.pb.h" #include "paddle/infrt/support/variant.h" -namespace infrt::paddle::pb { +namespace infrt { +namespace paddle { +namespace pb { namespace framework_proto = ::paddle::framework::proto; @@ -195,4 +197,6 @@ template <> void OpDesc::SetAttr>(const std::string &name, const std::vector &v); -} // namespace infrt::paddle::pb +} // namespace pb +} // namespace paddle +} // namespace infrt diff --git a/paddle/infrt/paddle/pb/program_desc.cc b/paddle/infrt/paddle/pb/program_desc.cc index ed8a7e36e0129..9d725485a974d 100644 --- a/paddle/infrt/paddle/pb/program_desc.cc +++ b/paddle/infrt/paddle/pb/program_desc.cc @@ -17,7 +17,9 @@ #include #include -namespace infrt::paddle::pb { +namespace infrt { +namespace paddle { +namespace pb { template <> framework_proto::BlockDesc* ProgramDesc::GetBlock( @@ -32,4 +34,6 @@ ProgramDesc::AddBlock() { return desc_->add_blocks(); } -} // namespace infrt::paddle::pb +} // namespace pb +} // namespace paddle +} // namespace infrt diff --git a/paddle/infrt/paddle/pb/program_desc.h b/paddle/infrt/paddle/pb/program_desc.h index 4adad650c974d..b1e64f8e86611 100644 --- a/paddle/infrt/paddle/pb/program_desc.h +++ b/paddle/infrt/paddle/pb/program_desc.h @@ -21,7 +21,9 @@ #include "paddle/infrt/paddle/cpp/desc_api.h" #include "paddle/infrt/paddle/framework.pb.h" -namespace infrt::paddle::pb { +namespace infrt { +namespace paddle { +namespace pb { namespace framework_proto = ::paddle::framework::proto; class ProgramDesc : public cpp::ProgramDescAPI { @@ -58,4 +60,6 @@ class ProgramDesc : public cpp::ProgramDescAPI { framework_proto::ProgramDesc *desc_; // not_own }; -} // namespace infrt::paddle::pb +} // namespace pb +} // namespace paddle +} // namespace infrt diff --git a/paddle/infrt/paddle/pb/var_desc.cc b/paddle/infrt/paddle/pb/var_desc.cc index cf80df4f1b845..7ea2e24da3446 100644 --- a/paddle/infrt/paddle/pb/var_desc.cc +++ b/paddle/infrt/paddle/pb/var_desc.cc @@ -19,7 +19,9 @@ #include "paddle/infrt/paddle/cpp/desc_api.h" #include "paddle/infrt/paddle/framework.pb.h" -namespace infrt::paddle::pb { +namespace infrt { +namespace paddle { +namespace pb { cpp::VarDescAPI::Type VarDesc::GetType() const { auto type = desc_->type().type(); @@ -364,4 +366,6 @@ VarDesc::mutable_tensor_descs() { return std::vector(); } -} // namespace infrt::paddle::pb +} // namespace pb +} // namespace paddle +} // namespace infrt diff --git a/paddle/infrt/paddle/pb/var_desc.h b/paddle/infrt/paddle/pb/var_desc.h index 4cff5fdee0375..7215ba6bb6aa7 100644 --- a/paddle/infrt/paddle/pb/var_desc.h +++ b/paddle/infrt/paddle/pb/var_desc.h @@ -23,7 +23,9 @@ #include "paddle/infrt/paddle/cpp/desc_api.h" #include "paddle/infrt/paddle/framework.pb.h" -namespace infrt::paddle::pb { +namespace infrt { +namespace paddle { +namespace pb { namespace framework_proto = ::paddle::framework::proto; // convert between std::vector and protobuf repeated. @@ -121,4 +123,6 @@ class VarDesc : public cpp::VarDescAPI { framework_proto::VarDesc *desc_; }; -} // namespace infrt::paddle::pb +} // namespace pb +} // namespace paddle +} // namespace infrt From 87ee3e4f5438c567796e128b73eb7703aa56d2ec Mon Sep 17 00:00:00 2001 From: Zhangjingyu06 <92561254+Zhangjingyu06@users.noreply.github.com> Date: Fri, 14 Jan 2022 16:15:47 +0800 Subject: [PATCH 03/10] [XPU]add stack_grad op for kunlun2,*test=kunlun (#38674) * [XPU]add split op for kunlun2,*test=kunlun * [XPU]add split op for kunlun2,*test=kunlun * [XPU]add split op for kunlun,*test=kunlun * [XPU]add stack_grad op for kunlun2,*test=kunlun Co-authored-by: QingshuChen --- paddle/fluid/operators/stack_op_xpu.cc | 43 ++++++++++++++++--- .../fluid/platform/device/xpu/xpu1_op_list.h | 1 + .../fluid/platform/device/xpu/xpu2_op_list.h | 2 + .../tests/unittests/xpu/test_stack_op_xpu.py | 19 +++++++- 4 files changed, 58 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/operators/stack_op_xpu.cc b/paddle/fluid/operators/stack_op_xpu.cc index 01ec4a2b16b4a..a2590e1180c1a 100644 --- a/paddle/fluid/operators/stack_op_xpu.cc +++ b/paddle/fluid/operators/stack_op_xpu.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,9 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifdef PADDLE_WITH_XPU #include "paddle/fluid/operators/stack_op.h" #include -#ifdef PADDLE_WITH_XPU +#include +#include "paddle/fluid/operators/concat_op.h" +#include "paddle/fluid/platform/device/xpu/xpu_header.h" namespace paddle { namespace operators { @@ -59,14 +62,44 @@ class StackXPUKernel : public framework::OpKernel { } }; +template +class StackGradXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* dy = ctx.Input(framework::GradVarName("Y")); + auto dx = ctx.MultiOutput(framework::GradVarName("X")); + auto axis = ctx.Attr("axis"); + auto& dev_ctx = ctx.template device_context(); + auto dy_dims = dy->dims(); + + if (axis < 0) axis += dy_dims.size() + 1; + auto dy_shape = framework::vectorize(dy_dims); + + std::vector dx_dims_list(dx.size(), 1); + std::vector dx_lists; + for (auto out : dx) { + dx_lists.push_back(out->mutable_data(ctx.GetPlace())); + } + + int r = xpu::split(dev_ctx.x_context(), dy->data(), dx_lists, + dy_shape, dx_dims_list, axis); + PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, + platform::errors::External( + "The stack_grad XPU kernel return wrong value[%d %s]", + r, XPUAPIErrorMsg[r])); + } +}; + } // namespace operators } // namespace paddle namespace plat = paddle::platform; namespace ops = paddle::operators; - REGISTER_OP_XPU_KERNEL(stack, - ops::StackXPUKernel, + ops::StackXPUKernel, ops::StackXPUKernel, - ops::StackXPUKernel); + ops::StackXPUKernel); +REGISTER_OP_XPU_KERNEL(stack_grad, + ops::StackGradXPUKernel, + ops::StackGradXPUKernel); #endif diff --git a/paddle/fluid/platform/device/xpu/xpu1_op_list.h b/paddle/fluid/platform/device/xpu/xpu1_op_list.h index 26a1426bea036..a76bdd4ae9679 100644 --- a/paddle/fluid/platform/device/xpu/xpu1_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu1_op_list.h @@ -300,6 +300,7 @@ XPUOpMap& get_kl1_ops() { pOpKernelType(vartype::UINT8, XPUPlace()), pOpKernelType(vartype::FP32, XPUPlace())})}, {"stack", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"stack_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"tanh_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"tanh", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h index 79261a5d7bc88..3d140b4693a6f 100644 --- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h @@ -333,6 +333,8 @@ XPUOpMap& get_kl2_ops() { {"stack", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), pOpKernelType(vartype::INT64, XPUPlace()), pOpKernelType(vartype::INT32, XPUPlace())})}, + {"stack_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), + pOpKernelType(vartype::INT32, XPUPlace())})}, {"sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), pOpKernelType(vartype::FP16, XPUPlace())})}, {"tanh_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), diff --git a/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py index 68e5a6ccdbfb7..20446aee41ec7 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -66,6 +66,15 @@ def test_check_output(self): place = paddle.XPUPlace(0) self.check_output_with_place(place) + def test_check_grad(self): + if self.dtype == 'int64' or self.dtype == 'int32': + pass + else: + if paddle.is_compiled_with_xpu(): + paddle.enable_static() + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, self.get_x_names(), 'Y') + class TestStackOp1(TestStackOpBase): def initParameters(self): @@ -81,11 +90,17 @@ class TestStackOp3(TestStackOpBase): def initParameters(self): self.axis = -1 + def test_check_grad(self): + pass + class TestStackOp4(TestStackOpBase): def initParameters(self): self.axis = -4 + def test_check_grad(self): + pass + class TestStackOp5(TestStackOpBase): def initParameters(self): @@ -113,7 +128,7 @@ def initDefaultParameters(self): self.num_inputs = 4 self.input_dim = (5, 6, 7) self.axis = 0 - self.dtype = 'int' + self.dtype = 'int32' def initParameters(self): self.num_inputs = 16 From 050aa6fe5a524b0e7b85201c54a0da315701518d Mon Sep 17 00:00:00 2001 From: heliqi Date: Fri, 14 Jan 2022 16:50:56 +0800 Subject: [PATCH 04/10] add flatten_contiguous_range OpConvert for Paddle-TRT (#38922) * add trt_convert_flatten_contiguous_rang op * trt version >7,support trt_convert_flatten_contiguous_rang * trt version >7,support trt_convert_flatten_contiguous_rang * trt version >7,support trt_convert_flatten_contiguous_rang * test cast add trt version >=7 skip --- .../ir_passes/tensorrt_subgraph_pass.cc | 7 +- .../fluid/inference/api/analysis_predictor.cc | 1 + .../inference/tensorrt/convert/CMakeLists.txt | 2 +- .../convert/flatten_contiguous_range_op.cc | 136 ++++++++++++++++++ paddle/fluid/inference/tensorrt/op_teller.cc | 32 +++++ ...st_trt_convert_flatten_contiguous_range.py | 115 +++++++++++++++ 6 files changed, 290 insertions(+), 3 deletions(-) create mode 100644 paddle/fluid/inference/tensorrt/convert/flatten_contiguous_range_op.cc create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_flatten_contiguous_range.py diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index ef50df3084f8c..55bbc55450876 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -46,8 +46,11 @@ void analysis::TensorRtSubgraphPass::ApplyImpl( << " is diabled by config in TensorRT"; return false; } - return tensorrt::OpTeller::Global().Tell(node, no_calib_int8, - with_dynamic_shape); + bool is_ok = tensorrt::OpTeller::Global().Tell(node, no_calib_int8, + with_dynamic_shape); + if (!is_ok) + VLOG(3) << node->Op()->Type().c_str() << " op is not in TensorRT"; + return is_ok; }; framework::ir::SubGraphFuser fuser( diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 2799fb9e174d3..d4b680288e347 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1416,6 +1416,7 @@ USE_TRT_CONVERTER(elementwise_min_tensor); USE_TRT_CONVERTER(elementwise_pow_tensor); USE_TRT_CONVERTER(transpose); USE_TRT_CONVERTER(flatten); +USE_TRT_CONVERTER(flatten_contiguous_range); USE_TRT_CONVERTER(matmul); USE_TRT_CONVERTER(conv2d); USE_TRT_CONVERTER(relu); diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index a885b69fa7fbc..017caca6adc81 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -3,7 +3,7 @@ nv_library(tensorrt_converter SRCS matmul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc group_norm_op.cc pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc - shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc + shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc flatten_contiguous_range_op.cc emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc gather_op.cc anchor_generator_op.cc diff --git a/paddle/fluid/inference/tensorrt/convert/flatten_contiguous_range_op.cc b/paddle/fluid/inference/tensorrt/convert/flatten_contiguous_range_op.cc new file mode 100644 index 0000000000000..706814340a0e9 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/flatten_contiguous_range_op.cc @@ -0,0 +1,136 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace inference { +namespace tensorrt { +/* + * flatten_contiguous_range trt converter + */ +class FlattenContiguousRangeOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + framework::OpDesc op_desc(op, nullptr); + // Declare inputs + auto* input = engine_->GetITensor(op_desc.Input("X")[0]); + int dims = input->getDimensions().nbDims; + int start_axis = BOOST_GET_CONST(int, op_desc.GetAttr("start_axis")); + int stop_axis = BOOST_GET_CONST(int, op_desc.GetAttr("stop_axis")); + + nvinfer1::IShuffleLayer* layer = nullptr; + if (!engine_->with_dynamic_shape()) { + if (start_axis < 0) start_axis += dims + 1; + if (stop_axis < 0) stop_axis += dims + 1; + int dim_prod = 1; + nvinfer1::Dims flatten_dim; + flatten_dim.nbDims = dims - (stop_axis - start_axis); + for (int i = 0, j = 0; i < dims; ++i) { + if (start_axis <= i + 1 && i + 1 <= stop_axis) { + int dim_i = input->getDimensions().d[i]; + PADDLE_ENFORCE_GT(dim_i, 0, platform::errors::InvalidArgument( + "flatten_contiguous_range input dim " + "should be > 0, but got %d.", + dim_i)); + dim_prod *= dim_i; + if (i + 1 == stop_axis) { + flatten_dim.d[j++] = dim_prod; + } + } else { + flatten_dim.d[j++] = input->getDimensions().d[i]; + } + } + layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + layer->setReshapeDimensions(flatten_dim); + } else { + if (start_axis < 0) start_axis += dims; + if (stop_axis < 0) stop_axis += dims; + auto* shape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shape, *input); + auto* shape_layer_itensor = shape_layer->getOutput(0); + + nvinfer1::Dims start_dim, size_dim, stride_dim; + start_dim.nbDims = 1; + size_dim.nbDims = 1; + stride_dim.nbDims = 1; + start_dim.d[0] = start_axis; + size_dim.d[0] = stop_axis - start_axis + 1; + stride_dim.d[0] = 1; + auto* slice_layer = + TRT_ENGINE_ADD_LAYER(engine_, Slice, *shape_layer_itensor, start_dim, + size_dim, stride_dim); + uint32_t reduce_dim = 1; + auto* reduce_prod_layer = TRT_ENGINE_ADD_LAYER( + engine_, Reduce, *(slice_layer->getOutput(0)), + nvinfer1::ReduceOperation::kPROD, reduce_dim, true); + + nvinfer1::ITensor* input_shape = nullptr; + if (start_axis == 0 && stop_axis == dims - 1) { + input_shape = reduce_prod_layer->getOutput(0); + } else { + std::vector itensors; + if (start_axis > 0) { + nvinfer1::Dims left_start_dim, left_size_dim, left_stride_dim; + left_start_dim.nbDims = 1; + left_size_dim.nbDims = 1; + left_stride_dim.nbDims = 1; + left_start_dim.d[0] = 0; + left_size_dim.d[0] = start_axis; + left_stride_dim.d[0] = 1; + auto* slice_layer_left = TRT_ENGINE_ADD_LAYER( + engine_, Slice, *shape_layer_itensor, left_start_dim, + left_size_dim, left_stride_dim); + itensors.push_back(slice_layer_left->getOutput(0)); + } + itensors.push_back(reduce_prod_layer->getOutput(0)); + if (stop_axis < dims - 1) { + nvinfer1::Dims right_start_dim, right_size_dim, right_stride_dim; + right_start_dim.nbDims = 1; + right_size_dim.nbDims = 1; + right_stride_dim.nbDims = 1; + right_start_dim.d[0] = stop_axis + 1; + right_size_dim.d[0] = dims - stop_axis - 1; + right_stride_dim.d[0] = 1; + auto* slice_layer_right = TRT_ENGINE_ADD_LAYER( + engine_, Slice, *shape_layer_itensor, right_start_dim, + right_size_dim, right_stride_dim); + itensors.push_back(slice_layer_right->getOutput(0)); + } + auto* concat_layer = TRT_ENGINE_ADD_LAYER( + engine_, Concatenation, itensors.data(), itensors.size()); + concat_layer->setAxis(0); + input_shape = concat_layer->getOutput(0); + } + layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + layer->setInput(1, *input_shape); + } + auto output_name = op_desc.Output("Out")[0]; + RreplenishLayerAndOutput(layer, "flatten_contiguous_range", {output_name}, + test_mode); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(flatten_contiguous_range, + FlattenContiguousRangeOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index ddee4e0d682b0..6663103d4ca37 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -55,6 +55,7 @@ struct SimpleOpTypeSetTeller : public Teller { // #endif #if IS_TRT_VERSION_GE(7000) teller_set.insert("tile"); + teller_set.insert("flatten_contiguous_range"); #endif #if CUDA_VERSION >= 10020 teller_set.insert("reshape"); @@ -531,6 +532,37 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, if (axis != 1) return false; } } + if (op_type == "flatten_contiguous_range") { + if (!with_dynamic_shape) { + int start_axis = BOOST_GET_CONST(int, desc.GetAttr("start_axis")); + int stop_axis = BOOST_GET_CONST(int, desc.GetAttr("stop_axis")); + auto x_var_name = desc.Input("X")[0]; + auto* block = desc.Block(); + if (block == nullptr) { + VLOG(3) << "The block desc is nullptr, we can't continue to analyze. " + "Developers need to check whether block_desc is passed in " + "the pass."; + return false; + } + auto* x_var_desc = block->FindVar(x_var_name); + const auto x_shape = x_var_desc->GetShape(); + int dims = x_shape.size(); + if (start_axis < 0) start_axis += dims; + if (start_axis == 0) { + VLOG(3) << "TRT flatten_contiguous_range not support the " + "batch-dimension being changed"; + return false; + } + if (stop_axis < 0) stop_axis += dims; + for (int i = start_axis; i <= stop_axis; ++i) { + if (x_shape[i] < 0) { + VLOG(3) << "On TRT static shape,flatten_contiguous_range input dim " + "should be > 0"; + return false; + } + } + } + } if (op_type == "gather") { auto gather_inputs = desc.Inputs(); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_flatten_contiguous_range.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_flatten_contiguous_range.py new file mode 100644 index 0000000000000..a4060349d4bed --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_flatten_contiguous_range.py @@ -0,0 +1,115 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons +from program_config import TensorConfig, ProgramConfig +import unittest +import numpy as np +import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set + + +class TrtConvertFlattenContiguousRangeTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input(batch): + return np.random.random([2, batch, 4, 8, 3]).astype(np.float32) + + for batch in [1, 2, 4]: + for start_axis in range(5): + for stop_axis in range(start_axis, 5): + type = "flatten_contiguous_range" + op_outputs = { + "Out": ["output_data"], + "XShape": ["xshape_data"] + } + ops_config = [{ + "op_type": type, + "op_inputs": { + "X": ["input_data"] + }, + "op_outputs": op_outputs, + "op_attrs": { + "start_axis": start_axis, + "stop_axis": stop_axis, + } + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig( + data_gen=partial(generate_input, batch)) + }, + outputs=["output_data"]) + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = {"input_data": [2, 1, 4, 8, 3]} + self.dynamic_shape.max_input_shape = {"input_data": [2, 4, 4, 8, 3]} + self.dynamic_shape.opt_input_shape = {"input_data": [2, 2, 4, 8, 3]} + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[0] * 10 >= 7000: + if dynamic_shape: + return 1, 2 + else: + if attrs[0]['start_axis'] == 0: + return 0, 3 + else: + return 1, 2 + else: + return 0, 3 + + attrs = [ + program_config.ops[i].attrs + for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num(attrs, + True), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num(attrs, + True), 1e-5 + + def test(self): + self.run_test() + + +if __name__ == "__main__": + unittest.main() From a88791481484ab6a61540a737336d79c65d021dc Mon Sep 17 00:00:00 2001 From: zyfncg Date: Sat, 15 Jan 2022 12:39:49 +0800 Subject: [PATCH 05/10] fix performance problem caused by Conj (#38939) --- paddle/pten/kernels/complex_kernel.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/paddle/pten/kernels/complex_kernel.h b/paddle/pten/kernels/complex_kernel.h index b6074f117ea14..d12fc730fef87 100644 --- a/paddle/pten/kernels/complex_kernel.h +++ b/paddle/pten/kernels/complex_kernel.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include "paddle/fluid/platform/complex.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/infermeta/unary.h" #include "paddle/pten/kernels/empty_kernel.h" @@ -23,7 +24,13 @@ namespace pten { template void ConjKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out); -template +// If T is complex +template >::value || + std::is_same>::value, + bool> = true> DenseTensor Conj(const Context& dev_ctx, const DenseTensor& x) { auto out_meta = UnchangedInferMeta(x.meta()); auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); @@ -31,4 +38,15 @@ DenseTensor Conj(const Context& dev_ctx, const DenseTensor& x) { return dense_out; } +// If T is not complex +template >::value && + !std::is_same>::value, + bool> = true> +DenseTensor Conj(const Context& dev_ctx, const DenseTensor& x) { + return x; +} + } // namespace pten From 88966b283952096f81aab4918b7d83b303aabad2 Mon Sep 17 00:00:00 2001 From: Zhanlue Yang Date: Sat, 15 Jan 2022 14:35:33 +0800 Subject: [PATCH 06/10] [Unify Tensors PR #7] Merged LoDTensor with Tensor, test=allcases (#38880) * Merged LoDTensor with Tensor,test=allcases * Patched python level LoDTensor * Fixed example code failure * Polished function names, removed duplicated forward declarations --- paddle/fluid/distributed/fleet.h | 2 +- .../fluid/distributed/service/brpc_utils.cc | 2 +- .../test/brpc_service_dense_sgd_test.cc | 2 +- .../test/brpc_service_sparse_sgd_test.cc | 2 +- paddle/fluid/framework/data_feed.h | 2 +- .../framework/details/fetch_async_op_handle.h | 2 +- .../framework/details/variable_visitor.cc | 2 +- paddle/fluid/framework/device_worker.cc | 2 +- paddle/fluid/framework/device_worker.h | 3 +- paddle/fluid/framework/downpour_worker.cc | 2 +- paddle/fluid/framework/feed_fetch_method.cc | 2 +- paddle/fluid/framework/feed_fetch_method.h | 2 +- .../ir/conv_affine_channel_fuse_pass.cc | 2 +- .../fluid/framework/ir/conv_bn_fuse_pass.cc | 2 +- .../framework/ir/delete_dropout_op_pass.cc | 2 +- .../ir/delete_quant_dequant_op_pass.cc | 2 +- .../ir/fusion_group/code_generator_tester.cc | 2 +- paddle/fluid/framework/lod_tensor.cc | 28 -- paddle/fluid/framework/lod_tensor.h | 24 +- paddle/fluid/framework/naive_executor.h | 2 +- paddle/fluid/framework/operator.cc | 12 +- paddle/fluid/framework/operator.h | 6 - paddle/fluid/framework/pull_dense_worker.cc | 2 +- paddle/fluid/framework/tensor.h | 9 +- paddle/fluid/framework/tensor_util.cc | 24 ++ paddle/fluid/framework/tensor_util.h | 4 +- paddle/fluid/framework/trainer.h | 2 +- paddle/fluid/framework/var_type_traits.h | 7 +- paddle/fluid/inference/api/api_impl.h | 2 +- .../api/details/reset_tensor_array.h | 2 +- paddle/fluid/operators/assert_op.cc | 2 +- paddle/fluid/operators/assign_op.h | 2 +- .../operators/controlflow/while_op_helper.h | 2 +- paddle/fluid/operators/math/beam_search.cc | 1 - .../fluid/operators/math/beam_search_npu.cc | 1 - .../fluid/operators/math/sequence_padding.cc | 1 - paddle/fluid/operators/math/sequence_scale.cc | 2 +- paddle/fluid/operators/math/sequence_scale.h | 2 +- paddle/fluid/operators/memcpy_d2h_op.h | 2 +- paddle/fluid/operators/memcpy_h2d_op.h | 2 +- paddle/fluid/operators/memcpy_op.h | 2 +- paddle/fluid/operators/merge_lod_tensor_op.cc | 2 +- paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc | 1 - paddle/fluid/operators/print_op.cc | 2 +- paddle/fluid/operators/recurrent_op.cc | 2 +- .../reorder_lod_tensor_by_rank_op.cc | 2 +- paddle/fluid/operators/split_lod_tensor_op.cc | 2 +- paddle/fluid/operators/tensor_formatter.h | 2 +- paddle/fluid/operators/transfer_layout_op.h | 2 +- paddle/fluid/platform/lodtensor_printer.cc | 2 +- paddle/fluid/pybind/pybind.cc | 277 ++++++------------ paddle/pten/api/lib/utils/tensor_utils.cc | 51 ++-- paddle/pten/api/lib/utils/tensor_utils.h | 18 +- python/paddle/fluid/__init__.py | 5 + 54 files changed, 203 insertions(+), 343 deletions(-) diff --git a/paddle/fluid/distributed/fleet.h b/paddle/fluid/distributed/fleet.h index 6d9ce01535e9d..697dbb9170f18 100644 --- a/paddle/fluid/distributed/fleet.h +++ b/paddle/fluid/distributed/fleet.h @@ -36,7 +36,7 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Scope; class SelectedRows; class Variable; diff --git a/paddle/fluid/distributed/service/brpc_utils.cc b/paddle/fluid/distributed/service/brpc_utils.cc index 6eb8462977b60..db55c9ad438a7 100644 --- a/paddle/fluid/distributed/service/brpc_utils.cc +++ b/paddle/fluid/distributed/service/brpc_utils.cc @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace framework { class Variable; -class LoDTensor; +class Tensor; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc b/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc index 68d1d457500c7..c0c1fda4c4fca 100644 --- a/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc +++ b/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc @@ -31,7 +31,7 @@ class PSClient; class PSServer; } // namespace distributed namespace framework { -class LoDTensor; +class Tensor; class Variable; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc b/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc index 8fb3434af6e28..471750feaefef 100644 --- a/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc +++ b/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc @@ -32,7 +32,7 @@ class PSClient; class PSServer; } // namespace distributed namespace framework { -class LoDTensor; +class Tensor; class Variable; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/data_feed.h b/paddle/fluid/framework/data_feed.h index a4100e66e7285..2533acaa6d35a 100644 --- a/paddle/fluid/framework/data_feed.h +++ b/paddle/fluid/framework/data_feed.h @@ -50,7 +50,7 @@ DECLARE_bool(enable_slotrecord_reset_shrink); namespace paddle { namespace framework { class DataFeedDesc; -class LoDTensor; +class Tensor; class Scope; class Variable; } // namespace framework diff --git a/paddle/fluid/framework/details/fetch_async_op_handle.h b/paddle/fluid/framework/details/fetch_async_op_handle.h index f863cc304b8a5..41df0d90aaf81 100644 --- a/paddle/fluid/framework/details/fetch_async_op_handle.h +++ b/paddle/fluid/framework/details/fetch_async_op_handle.h @@ -24,7 +24,7 @@ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; namespace ir { class Node; diff --git a/paddle/fluid/framework/details/variable_visitor.cc b/paddle/fluid/framework/details/variable_visitor.cc index 71e5dd28eded1..56c88e9d25a91 100644 --- a/paddle/fluid/framework/details/variable_visitor.cc +++ b/paddle/fluid/framework/details/variable_visitor.cc @@ -18,7 +18,7 @@ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Variable; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/device_worker.cc b/paddle/fluid/framework/device_worker.cc index fbaae5a21c274..3b70ef737f5be 100644 --- a/paddle/fluid/framework/device_worker.cc +++ b/paddle/fluid/framework/device_worker.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Scope; void DeviceWorker::SetRootScope(Scope* root_scope) { root_scope_ = root_scope; } diff --git a/paddle/fluid/framework/device_worker.h b/paddle/fluid/framework/device_worker.h index 15acedf3cf50a..332a584049127 100644 --- a/paddle/fluid/framework/device_worker.h +++ b/paddle/fluid/framework/device_worker.h @@ -43,10 +43,9 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class ProgramDesc; class Scope; -class Tensor; } // namespace framework namespace platform { class DeviceContext; diff --git a/paddle/fluid/framework/downpour_worker.cc b/paddle/fluid/framework/downpour_worker.cc index 11f70acb73aa7..cc97af4b1969d 100644 --- a/paddle/fluid/framework/downpour_worker.cc +++ b/paddle/fluid/framework/downpour_worker.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Variable; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/feed_fetch_method.cc b/paddle/fluid/framework/feed_fetch_method.cc index 2eac65c90c02f..0c3aafd85f283 100644 --- a/paddle/fluid/framework/feed_fetch_method.cc +++ b/paddle/fluid/framework/feed_fetch_method.cc @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Variable; void SetFeedVariable(Scope* scope, const LoDTensor& input, diff --git a/paddle/fluid/framework/feed_fetch_method.h b/paddle/fluid/framework/feed_fetch_method.h index 4c2f5b9796a22..dc9310ff5b263 100644 --- a/paddle/fluid/framework/feed_fetch_method.h +++ b/paddle/fluid/framework/feed_fetch_method.h @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Scope; void SetFeedVariable(Scope* scope, const LoDTensor& input, diff --git a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc index 6cd16132c2a10..c883412a9a4c3 100644 --- a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc @@ -20,7 +20,7 @@ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Scope; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc index b6c410dc957fd..6443d0594a9c5 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc @@ -21,7 +21,7 @@ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Scope; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/delete_dropout_op_pass.cc b/paddle/fluid/framework/ir/delete_dropout_op_pass.cc index 09962239a01b1..c0a4f099e39d4 100644 --- a/paddle/fluid/framework/ir/delete_dropout_op_pass.cc +++ b/paddle/fluid/framework/ir/delete_dropout_op_pass.cc @@ -17,7 +17,7 @@ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc b/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc index b99f2266f39b2..af75646551e28 100644 --- a/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc +++ b/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc @@ -18,7 +18,7 @@ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc b/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc index 0d490d4e669fc..09fd6b8dd1116 100644 --- a/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc +++ b/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc @@ -24,7 +24,7 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc index 69a2a6eefaf8c..4681933a66cd3 100644 --- a/paddle/fluid/framework/lod_tensor.cc +++ b/paddle/fluid/framework/lod_tensor.cc @@ -27,34 +27,6 @@ class DeviceContext; namespace paddle { namespace framework { -std::ostream &operator<<(std::ostream &os, const LoD &lod) { - os << "{"; - for (auto &v : lod) { - os << "{"; - bool is_first = true; - for (auto &i : v) { - if (is_first) { - os << i; - is_first = false; - } else { - os << ", " << i; - } - } - os << "}"; - } - os << "}"; - - return os; -} - -std::ostream &operator<<(std::ostream &os, const LoDTensor &t) { - if (t.lod().size() > 0) { - os << " - lod: " << t.lod() << "\n"; - } - os << static_cast(t); - return os; -} - std::string LoDToString(const LoD &lod) { std::ostringstream stream; stream << lod; diff --git a/paddle/fluid/framework/lod_tensor.h b/paddle/fluid/framework/lod_tensor.h index 22f2027998137..bbb8f8005168c 100644 --- a/paddle/fluid/framework/lod_tensor.h +++ b/paddle/fluid/framework/lod_tensor.h @@ -28,9 +28,6 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" namespace paddle { -namespace framework { -class LoDTensor; -} // namespace framework namespace platform { class DeviceContext; } // namespace platform @@ -39,6 +36,8 @@ class DeviceContext; namespace paddle { namespace framework { +using LoDTensor = paddle::framework::Tensor; + /* * LoD is short for Level of Details. * @@ -56,9 +55,6 @@ namespace framework { */ using LoD = std::vector>; -std::ostream& operator<<(std::ostream& os, const LoD& lod); -std::ostream& operator<<(std::ostream& os, const LoDTensor& t); - std::string LoDToString(const LoD& lod); LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin, @@ -102,22 +98,6 @@ bool CheckLoD(const LoD& in, int tensor_height = -1); */ bool CheckAbsLoD(const LoD& in, int tensor_height = -1); -/* - * LoDTensor (Level of details Tensor) - * see https://en.wikipedia.org/wiki/Level_of_details for reference. - */ -class LoDTensor : public Tensor { - public: - using Tensor::Tensor; - - // Split LoDTensor and copy to each place specified in places. - std::vector SplitLoDTensor( - const std::vector places) const; - - void MergeLoDTensor(const std::vector& lod_tensors, - platform::Place place); -}; - /* * Expand the `source` to fit the LoD of `lod`. For example, a `source` * LoDTensor is diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index ed475e66f626d..f706eabb47988 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -31,7 +31,7 @@ namespace framework { * Simple, intuitive and effective. Only single thread is supported, and * currently designed for inference. */ -class LoDTensor; +class Tensor; class ProgramDesc; class Scope; diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 93349b8b88449..aa21c8eed256b 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -34,7 +34,7 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; } // namespace framework } // namespace paddle #ifdef PADDLE_WITH_XPU @@ -555,11 +555,6 @@ Variable* ExecutionContext::OutputVar(const std::string& name) const { return it->second.empty() ? nullptr : it->second[0]; } -template <> -const Tensor* ExecutionContext::Input(const std::string& name) const { - return Input(name); -} - template <> const std::vector ExecutionContext::MultiInput( const std::string& name) const { @@ -584,11 +579,6 @@ const std::vector ExecutionContext::MultiInput( return res; } -template <> -Tensor* ExecutionContext::Output(const std::string& name) const { - return Output(name); -} - template <> std::vector ExecutionContext::MultiOutput( const std::string& name) const { diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 9d75c66beb7d4..12946b416cf9f 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -479,16 +479,10 @@ class ExecutionArgumentMappingContext : public pten::ArgumentMappingContext { const ExecutionContext& ctx_; }; -template <> -const Tensor* ExecutionContext::Input(const std::string& name) const; - template <> const std::vector ExecutionContext::MultiInput( const std::string& name) const; -template <> -Tensor* ExecutionContext::Output(const std::string& name) const; - template <> std::vector ExecutionContext::MultiOutput( const std::string& name) const; diff --git a/paddle/fluid/framework/pull_dense_worker.cc b/paddle/fluid/framework/pull_dense_worker.cc index 77d8abcd26e9e..b13aaadc81661 100644 --- a/paddle/fluid/framework/pull_dense_worker.cc +++ b/paddle/fluid/framework/pull_dense_worker.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Scope; class Variable; diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index fcdb837bc80ce..95405820a48d9 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -36,7 +36,7 @@ namespace paddle { namespace framework { -class LoDTensor; +using LoD = std::vector>; /* NOTE(liym27): [ What is TensorInplaceVersion used for? ] @@ -74,6 +74,13 @@ class Tensor : public pten::DenseTensor { using DenseTensor = pten::DenseTensor; using DenseTensor::DenseTensor; + // Split Tensor and copy to each place specified in places. + std::vector SplitLoDTensor( + const std::vector places) const; + + void MergeLoDTensor(const std::vector& lod_tensors, + platform::Place place); + /*! The internal of two tensors share the same memory block. */ Tensor& ShareDataWith(const Tensor& src); diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index 724e3cc1e2ee8..84334417dc7da 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -1428,7 +1428,31 @@ std::ostream& print_tensor>( return os; } +std::ostream& operator<<(std::ostream& os, const LoD& lod) { + os << "{"; + for (auto& v : lod) { + os << "{"; + bool is_first = true; + for (auto& i : v) { + if (is_first) { + os << i; + is_first = false; + } else { + os << ", " << i; + } + } + os << "}"; + } + os << "}"; + + return os; +} + std::ostream& operator<<(std::ostream& os, const Tensor& t) { + if (t.lod().size() > 0) { + os << " - lod: " << t.lod() << "\n"; + } + os << " - place: " << t.place() << "\n"; os << " - shape: [" << t.dims() << "]\n"; os << " - layout: " << DataLayoutToString(t.layout()) << "\n"; diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index 11858e4166595..355be39baa2a5 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -39,6 +39,9 @@ limitations under the License. */ namespace paddle { namespace framework { +std::ostream& operator<<(std::ostream& os, const LoD& lod); +std::ostream& operator<<(std::ostream& os, const Tensor& t); + class PrintOptions { public: static PrintOptions& Instance() { @@ -494,6 +497,5 @@ inline void TensorToVector(const Tensor& src, std::vector* dst) { delete[] array; } -std::ostream& operator<<(std::ostream& os, const Tensor& t); } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/trainer.h b/paddle/fluid/framework/trainer.h index 4823c08305760..8bba9492a5686 100644 --- a/paddle/fluid/framework/trainer.h +++ b/paddle/fluid/framework/trainer.h @@ -40,7 +40,7 @@ namespace paddle { namespace framework { class Dataset; -class LoDTensor; +class Tensor; class ProgramDesc; class PullDenseWorker; class Scope; diff --git a/paddle/fluid/framework/var_type_traits.h b/paddle/fluid/framework/var_type_traits.h index f4c41197a9dfa..715e7a14c5529 100644 --- a/paddle/fluid/framework/var_type_traits.h +++ b/paddle/fluid/framework/var_type_traits.h @@ -70,11 +70,10 @@ class BKCLCommunicator; namespace framework { class LoDRankTable; class ScopeBase; -class LoDTensor; +class Tensor; class ReaderHolder; class Scope; class SelectedRows; -class Tensor; } // namespace framework namespace operators { @@ -164,8 +163,8 @@ struct VarTypeRegistryImpl { // Users should add other variable types below. // Paddle would generate unique Ids for each registered variable types. using VarTypeRegistry = detail::VarTypeRegistryImpl< - Tensor, LoDTensor, SelectedRows, std::vector, LoDRankTable, - Strings, LoDTensorArray, platform::PlaceList, ReaderHolder, String, Scope *, + Tensor, SelectedRows, std::vector, LoDRankTable, Strings, + LoDTensorArray, platform::PlaceList, ReaderHolder, String, Scope *, operators::reader::LoDTensorBlockingQueueHolder, FetchList, FeedList, operators::reader::OrderedMultiDeviceLoDTensorBlockingQueueHolder, #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) diff --git a/paddle/fluid/inference/api/api_impl.h b/paddle/fluid/inference/api/api_impl.h index be771ac48fc15..bf67cfed35f89 100644 --- a/paddle/fluid/inference/api/api_impl.h +++ b/paddle/fluid/inference/api/api_impl.h @@ -35,7 +35,7 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Scope; } // namespace framework diff --git a/paddle/fluid/inference/api/details/reset_tensor_array.h b/paddle/fluid/inference/api/details/reset_tensor_array.h index f12a54cdccedc..857160ad10282 100644 --- a/paddle/fluid/inference/api/details/reset_tensor_array.h +++ b/paddle/fluid/inference/api/details/reset_tensor_array.h @@ -23,7 +23,7 @@ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Scope; class SelectedRows; } // namespace framework diff --git a/paddle/fluid/operators/assert_op.cc b/paddle/fluid/operators/assert_op.cc index 3e4250389fcfc..466e0e793e4e3 100644 --- a/paddle/fluid/operators/assert_op.cc +++ b/paddle/fluid/operators/assert_op.cc @@ -19,7 +19,7 @@ namespace paddle { namespace framework { class InferShapeContext; -class LoDTensor; +class Tensor; class OpDesc; class Scope; class Variable; diff --git a/paddle/fluid/operators/assign_op.h b/paddle/fluid/operators/assign_op.h index bd314a00424bd..d9648c9617255 100644 --- a/paddle/fluid/operators/assign_op.h +++ b/paddle/fluid/operators/assign_op.h @@ -27,7 +27,7 @@ class DeviceContext; namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Variable; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/operators/controlflow/while_op_helper.h b/paddle/fluid/operators/controlflow/while_op_helper.h index 1685da4e95822..8ef12ca05e36a 100644 --- a/paddle/fluid/operators/controlflow/while_op_helper.h +++ b/paddle/fluid/operators/controlflow/while_op_helper.h @@ -24,7 +24,7 @@ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class ProgramDesc; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/operators/math/beam_search.cc b/paddle/fluid/operators/math/beam_search.cc index 5271da91b8c15..c52ba68331580 100644 --- a/paddle/fluid/operators/math/beam_search.cc +++ b/paddle/fluid/operators/math/beam_search.cc @@ -16,7 +16,6 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; class Tensor; } // namespace framework namespace platform { diff --git a/paddle/fluid/operators/math/beam_search_npu.cc b/paddle/fluid/operators/math/beam_search_npu.cc index 6afaaea0673b2..5aede02263dd5 100644 --- a/paddle/fluid/operators/math/beam_search_npu.cc +++ b/paddle/fluid/operators/math/beam_search_npu.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; class Tensor; } // namespace framework namespace platform { diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index e29313e9f742c..491d40d3ae567 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -16,7 +16,6 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; class Tensor; } // namespace framework namespace platform { diff --git a/paddle/fluid/operators/math/sequence_scale.cc b/paddle/fluid/operators/math/sequence_scale.cc index 8e58411a1f247..f4193bb71fabb 100644 --- a/paddle/fluid/operators/math/sequence_scale.cc +++ b/paddle/fluid/operators/math/sequence_scale.cc @@ -16,7 +16,7 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/operators/math/sequence_scale.h b/paddle/fluid/operators/math/sequence_scale.h index d84513e024d7f..c6c84bb55dfa7 100644 --- a/paddle/fluid/operators/math/sequence_scale.h +++ b/paddle/fluid/operators/math/sequence_scale.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/operators/memcpy_d2h_op.h b/paddle/fluid/operators/memcpy_d2h_op.h index efa8af8054fc8..94eed5cf83fee 100644 --- a/paddle/fluid/operators/memcpy_d2h_op.h +++ b/paddle/fluid/operators/memcpy_d2h_op.h @@ -24,7 +24,7 @@ class DeviceContext; namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Variable; class SelectedRows; } // namespace framework diff --git a/paddle/fluid/operators/memcpy_h2d_op.h b/paddle/fluid/operators/memcpy_h2d_op.h index a19dc3367a14b..cc6e771d105ae 100644 --- a/paddle/fluid/operators/memcpy_h2d_op.h +++ b/paddle/fluid/operators/memcpy_h2d_op.h @@ -25,7 +25,7 @@ class DeviceContext; namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Variable; class SelectedRows; } // namespace framework diff --git a/paddle/fluid/operators/memcpy_op.h b/paddle/fluid/operators/memcpy_op.h index 57dafab1d5bc7..b270d87ad00ea 100644 --- a/paddle/fluid/operators/memcpy_op.h +++ b/paddle/fluid/operators/memcpy_op.h @@ -27,7 +27,7 @@ class DeviceContext; namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Variable; class SelectedRows; } // namespace framework diff --git a/paddle/fluid/operators/merge_lod_tensor_op.cc b/paddle/fluid/operators/merge_lod_tensor_op.cc index 5024148fe5888..dae598ef64220 100644 --- a/paddle/fluid/operators/merge_lod_tensor_op.cc +++ b/paddle/fluid/operators/merge_lod_tensor_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace framework { class InferShapeContext; -class LoDTensor; +class Tensor; class OpDesc; class Scope; } // namespace framework diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index 30e788bb395a4..754b46c823b28 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; class Tensor; } // namespace framework namespace platform { diff --git a/paddle/fluid/operators/print_op.cc b/paddle/fluid/operators/print_op.cc index c558f1852f54c..cef2993fc30d5 100644 --- a/paddle/fluid/operators/print_op.cc +++ b/paddle/fluid/operators/print_op.cc @@ -19,7 +19,7 @@ namespace paddle { namespace framework { class InferShapeContext; -class LoDTensor; +class Tensor; class OpDesc; class Scope; } // namespace framework diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc index 556f1bccd1680..7adf7962e1987 100644 --- a/paddle/fluid/operators/recurrent_op.cc +++ b/paddle/fluid/operators/recurrent_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace framework { class InferShapeContext; -class LoDTensor; +class Tensor; class OpDesc; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc b/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc index d8d4e641aeb3e..4ba071032162a 100644 --- a/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc +++ b/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace framework { class LoDRankTable; -class LoDTensor; +class Tensor; class OpDesc; class Scope; } // namespace framework diff --git a/paddle/fluid/operators/split_lod_tensor_op.cc b/paddle/fluid/operators/split_lod_tensor_op.cc index fe646b2830b66..0ff622d329919 100644 --- a/paddle/fluid/operators/split_lod_tensor_op.cc +++ b/paddle/fluid/operators/split_lod_tensor_op.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace framework { class InferShapeContext; -class LoDTensor; +class Tensor; class OpDesc; class Scope; } // namespace framework diff --git a/paddle/fluid/operators/tensor_formatter.h b/paddle/fluid/operators/tensor_formatter.h index 4608663b3ed9b..38e3e7a94a524 100644 --- a/paddle/fluid/operators/tensor_formatter.h +++ b/paddle/fluid/operators/tensor_formatter.h @@ -20,7 +20,7 @@ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/operators/transfer_layout_op.h b/paddle/fluid/operators/transfer_layout_op.h index 1f09aec05b936..28135e37ed7bb 100644 --- a/paddle/fluid/operators/transfer_layout_op.h +++ b/paddle/fluid/operators/transfer_layout_op.h @@ -29,7 +29,7 @@ class DeviceContext; namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Variable; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/platform/lodtensor_printer.cc b/paddle/fluid/platform/lodtensor_printer.cc index d607dbe5b9999..4a5dfbee15de2 100644 --- a/paddle/fluid/platform/lodtensor_printer.cc +++ b/paddle/fluid/platform/lodtensor_printer.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace framework { -class LoDTensor; +class Tensor; class Variable; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index b5845a1ef9628..5f4e9a8861390 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -875,12 +875,12 @@ PYBIND11_MODULE(core_noavx, m) { .def("set", SetTensorFromPyArray, py::arg("array"), py::arg("place"), py::arg("zero_copy") = false, R"DOC( - Set the data of LoDTensor on place with given numpy array. + Set the data of Tensor on place with given numpy array. Args: lod (numpy.ndarray): The data to set. place (CPUPlace|CUDAPlace|XPUPlace|IPUPlace|CUDAPinnedPlace|NPUPlace|MLUPlace): The place where the - LoDTensor is to be set. + Tensor is to be set. zero_copy (bool, optional): Whether to share memory with the input numpy array. This parameter only works with CPUPlace. Default: False. @@ -893,17 +893,17 @@ PYBIND11_MODULE(core_noavx, m) { import paddle.fluid as fluid import numpy as np - t = fluid.LoDTensor() + t = fluid.Tensor() t.set(np.ndarray([5, 30]), fluid.CPUPlace()) )DOC") .def("shape", [](framework::Tensor &self) { return vectorize(self.dims()); }, R"DOC( - Return the shape of LoDTensor. + Return the shape of Tensor. Returns: - list[int]: The shape of LoDTensor. + list[int]: The shape of Tensor. Examples: @@ -912,7 +912,7 @@ PYBIND11_MODULE(core_noavx, m) { import paddle.fluid as fluid import numpy as np - t = fluid.LoDTensor() + t = fluid.Tensor() t.set(np.ndarray([5, 30]), fluid.CPUPlace()) print(t.shape()) # [5, 30] )DOC") @@ -949,117 +949,34 @@ PYBIND11_MODULE(core_noavx, m) { }) .def("_share_data_with", &framework::Tensor::ShareDataWith) .def("__getitem__", PySliceTensor, py::return_value_policy::reference) - .def("__str__", [](const framework::Tensor &self) { - std::stringstream ostr; - ostr << self; - return ostr.str(); - }); - - // TODO(cql): add reference: en_user_guide_lod_tensor - py::class_(m, "LoDTensor", R"DOC( - LoDTensor is a Tensor with optional LoD (Level of Details) information, - it can be used for variable-length sequences, - see :ref:`user_guide_lod_tensor` for details. - - LoDTensor can be converted to numpy array using :code:`numpy.array(lod_tensor)`. - - You can skip the following explanation if you don't need to know details - of LoDTensor. - - The following two examples show how to use LODtensor to represent - variable-length sequences. - - Example 1: - - Suppose x is a LoDTensor representing a variable-length sequence. - It contains two logical subsequences, the length of first logical sequence - is 2 (e.g., number of samples is 2), the length of second logical sequence - is 3, and the total length is 5. The data of the first logical sequence is - [1, 2], [3, 4], and the data of the second logical sequence is [5, 6], - [7, 8], [9, 10]. The data dimension of each sample is 2. So, the final - shape of the LoDTensor is [5, 2], of which 5 is the total length and 2 is - the dimension of each sample. - - Logically, we can represent the variable-length sequence in two ways: one - is in the form of recursive sequence lengths, that is, - x.recursive_sequence_lengths=[[2, 3]]; the other is in the form of offsets, - that is, x.lod=[[0, 2, 2+3]]. These two representations are equivalent, and - you can set and retrieve recursive_sequence_lengths or LoD through the - corresponding interfaces of LoDTensor introduced later. - - Actually, in order to access sequence faster, Paddle uses offset to store - different lengths of sequences. - Therefore, the operations on recursive_sequence_lengths will be converted - to the operations on LoD eventually. - - .. code-block:: python - - y.data = [[1, 2], [3, 4], - [5, 6], [7, 8], - [9, 10], [11, 12], [13, 14]] - - y.shape = [2+2+3, 2] - - y.recursive_sequence_lengths = [[2, 1], [2, 2, 3]] - - y.lod = [[0, 2, 3], [0, 2, 4, 7]] - - Example 2: - - LoD may have more than one level (for example, a paragraph may have more - than one sentence and a sentence may have more than one word). Suppose y - is a LoDTensor and its lod_level is 2. - From level = 0, there are two logical sequences, the length of which is - 2 and 1, respectively, indicating that the first logical sequence contains - two sub-sequences and the second logical sequence contains one sub-sequence. - From level = 1, the lengths of two sub-sequences contained by the first - logical sequence is 2 and 2, and the length of sub-sequence contained by - the second logical sequence is 3. - - Therefore, the LoDTensor is represented in the form of recursive sequence - lengths as y.recursive_sequence_lengths=[[2,1], [2,2,3]]; and equally, in - the form of offset, it is represented as y.lod=[[0,2,3], [0,2,4,7]]. - - .. code-block:: python - - y.data = [[1, 2], [3, 4], - [5, 6], [7, 8], - [9, 10], [11, 12], [13, 14]] - - y.shape = [2+2+3, 2] - - y.recursive_sequence_lengths = [[2, 1], [2, 2, 3]] - - y.lod = [[0, 2, 3], [0, 2, 4, 7]] - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - - t = fluid.LoDTensor() - - )DOC") - .def("__array__", - [](framework::Tensor &self) { return TensorToPyArray(self); }) + .def("__str__", + [](const framework::Tensor &self) { + std::stringstream ostr; + ostr << self; + return ostr.str(); + }) /* ------ End of original Tensor ------ */ + .def( + "__init__", + [](framework::Tensor &instance, const std::vector> + &recursive_sequence_lengths) { + LoD new_lod; + new_lod.reserve(recursive_sequence_lengths.size()); + std::copy(recursive_sequence_lengths.begin(), + recursive_sequence_lengths.end(), + std::back_inserter(new_lod)); + LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod); + PADDLE_ENFORCE_EQ( + CheckLoD(new_offset_lod, -1), true, + platform::errors::InvalidArgument( + "The provided recursive_sequence_lengths info is invalid, " + "the LoD converted by recursive_sequence_lengths is %s", + new_lod)); + new (&instance) framework::Tensor(new_offset_lod); + }) .def("__init__", - [](LoDTensor &instance, const std::vector> - &recursive_sequence_lengths) { - LoD new_lod; - new_lod.reserve(recursive_sequence_lengths.size()); - std::copy(recursive_sequence_lengths.begin(), - recursive_sequence_lengths.end(), - std::back_inserter(new_lod)); - LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod); - PADDLE_ENFORCE_EQ( - CheckLoD(new_offset_lod, -1), true, - platform::errors::InvalidArgument( - "The provided recursive_sequence_lengths info is invalid, " - "the LoD converted by recursive_sequence_lengths is %s", - new_lod)); - new (&instance) LoDTensor(new_offset_lod); + [](framework::Tensor &instance) { + new (&instance) framework::Tensor(); }) - .def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); }) // We implement offset based LOD in C++ while we use length based with // Python API. So we changed set_lod to set_recursive_sequence_lengths // to @@ -1067,7 +984,8 @@ PYBIND11_MODULE(core_noavx, m) { // The discussion is here: // https://github.com/PaddlePaddle/Paddle/issues/10855 .def("set_lod", - [](LoDTensor &self, const std::vector> &lod) { + [](framework::Tensor &self, + const std::vector> &lod) { // the input lod is offset-based level-of-detail info LoD new_lod; new_lod.reserve(lod.size()); @@ -1079,7 +997,7 @@ PYBIND11_MODULE(core_noavx, m) { self.set_lod(new_lod); }, py::arg("lod"), R"DOC( - Set LoD of the LoDTensor. + Set LoD of the Tensor. Args: lod (list[list[int]]): The lod to set. @@ -1093,14 +1011,14 @@ PYBIND11_MODULE(core_noavx, m) { import paddle.fluid as fluid import numpy as np - t = fluid.LoDTensor() + t = fluid.Tensor() t.set(np.ndarray([5, 30]), fluid.CPUPlace()) t.set_lod([[0, 2, 5]]) print(t.lod()) # [[0, 2, 5]] )DOC") .def("set_recursive_sequence_lengths", - [](LoDTensor &self, const std::vector> - &recursive_sequence_lengths) { + [](framework::Tensor &self, const std::vector> + &recursive_sequence_lengths) { // the input recursive_sequence_lengths is length-based // level-of-detail info LoD new_lod; @@ -1119,7 +1037,7 @@ PYBIND11_MODULE(core_noavx, m) { self.set_lod(new_offset_lod); }, py::arg("recursive_sequence_lengths"), R"DOC( - Set LoD of the LoDTensor according to recursive sequence lengths. + Set LoD of the Tensor according to recursive sequence lengths. For example, if recursive_sequence_lengths=[[2, 3]], which means there are two sequences with length 2 and 3 respectively, the @@ -1137,14 +1055,14 @@ PYBIND11_MODULE(core_noavx, m) { import paddle.fluid as fluid import numpy as np - t = fluid.LoDTensor() + t = fluid.Tensor() t.set(np.ndarray([5, 30]), fluid.CPUPlace()) t.set_recursive_sequence_lengths([[2, 3]]) - print(t.recursive_sequence_length()) # [[2, 3]] + print(t.recursive_sequence_lengths()) # [[2, 3]] print(t.lod()) # [[0, 2, 5]] )DOC") .def("lod", - [](LoDTensor &self) -> std::vector> { + [](framework::Tensor &self) -> std::vector> { // output the offset-based lod info LoD lod = self.lod(); std::vector> new_lod; @@ -1153,10 +1071,10 @@ PYBIND11_MODULE(core_noavx, m) { return new_lod; }, R"DOC( - Return the LoD of the LoDTensor. + Return the LoD of the Tensor. Returns: - list[list[int]]: The lod of the LoDTensor. + list[list[int]]: The lod of the Tensor. Examples: .. code-block:: python @@ -1164,14 +1082,14 @@ PYBIND11_MODULE(core_noavx, m) { import paddle.fluid as fluid import numpy as np - t = fluid.LoDTensor() + t = fluid.Tensor() t.set(np.ndarray([5, 30]), fluid.CPUPlace()) t.set_lod([[0, 2, 5]]) print(t.lod()) # [[0, 2, 5]] )DOC") // Set above comments of set_lod. .def("recursive_sequence_lengths", - [](LoDTensor &self) -> std::vector> { + [](framework::Tensor &self) -> std::vector> { // output the length-based lod info LoD lod = ConvertToLengthBasedLoD(self.lod()); std::vector> new_lod; @@ -1181,7 +1099,7 @@ PYBIND11_MODULE(core_noavx, m) { }, R"DOC( Return the recursive sequence lengths corresponding to of the LodD - of the LoDTensor. + of the Tensor. Returns: list[list[int]]: The recursive sequence lengths. @@ -1192,19 +1110,19 @@ PYBIND11_MODULE(core_noavx, m) { import paddle.fluid as fluid import numpy as np - t = fluid.LoDTensor() + t = fluid.Tensor() t.set(np.ndarray([5, 30]), fluid.CPUPlace()) t.set_recursive_sequence_lengths([[2, 3]]) print(t.recursive_sequence_lengths()) # [[2, 3]] )DOC") .def("has_valid_recursive_sequence_lengths", - [](LoDTensor &self) -> bool { + [](framework::Tensor &self) -> bool { // Check that the lod info is valid and match the outermost - // dimension of the LoDTensor data + // dimension of the Tensor data return CheckLoD(self.lod(), vectorize(self.dims()).front()); }, R"DOC( - Check whether the LoD of the LoDTensor is valid. + Check whether the LoD of the Tensor is valid. Returns: bool: Whether the LoD is valid. @@ -1215,91 +1133,80 @@ PYBIND11_MODULE(core_noavx, m) { import paddle.fluid as fluid import numpy as np - t = fluid.LoDTensor() + t = fluid.Tensor() t.set(np.ndarray([5, 30]), fluid.CPUPlace()) t.set_recursive_sequence_lengths([[2, 3]]) print(t.has_valid_recursive_sequence_lengths()) # True )DOC") - .def("__getitem__", PySliceTensor, py::return_value_policy::reference, - R"DOC( - Slice the original Tensor, and remove the LoD information. - - Returns: - out (Tensor): new Tensor(NOT LoDTensor). - )DOC") - .def("__str__", - [](const LoDTensor &self) { - std::stringstream ostr; - ostr << self; - return ostr.str(); - }) .def("_as_type", - [](const LoDTensor &self, + [](const framework::Tensor &self, paddle::framework::proto::VarType::Type type) { - LoDTensor dst; + framework::Tensor dst; if (self.IsInitialized() && self.numel() > 0) { TransDataType(self, type, &dst); } return dst; }) - .def("_copy", [](const LoDTensor &self, const platform::Place &place) { - // follow fetch_op's inplementation - LoDTensor dst; - if (self.IsInitialized() && self.numel() > 0) { - TensorCopySync(self, place, &dst); - } else { - // Not copy, if the src tensor is empty. - dst.clear(); - dst.Resize({0}); - } - dst.set_lod(self.lod()); - return dst; + .def("_copy", + [](const framework::Tensor &self, const platform::Place &place) { + // follow fetch_op's inplementation + framework::Tensor dst; + if (self.IsInitialized() && self.numel() > 0) { + TensorCopySync(self, place, &dst); + } else { + // Not copy, if the src tensor is empty. + dst.clear(); + dst.Resize({0}); + } + dst.set_lod(self.lod()); + return dst; #ifdef _WIN32 - }); + }); #else }) .def(py::pickle( - [](const LoDTensor &t) { // __getstate__ + [](const framework::Tensor &t) { // __getstate__ auto holder = t.Holder(); - PADDLE_ENFORCE_EQ( - platform::is_cpu_place(holder->place()), true, - platform::errors::PreconditionNotMet( - "LoDTensor is not on CPU." - "Now only LoDTensor on CPU can be serialized.")); - auto* mmap_writer_allocation = - dynamic_cast( - holder.get()); - PADDLE_ENFORCE_NOT_NULL(mmap_writer_allocation, - platform::errors::PreconditionNotMet( - "LoDTensor is not in shared memory." - "Now only LoDTensor on shared memory can be serialized.")); + PADDLE_ENFORCE_EQ(platform::is_cpu_place(holder->place()), true, + platform::errors::PreconditionNotMet( + "Tensor is not on CPU." + "Now only Tensor on CPU can be serialized.")); + auto *mmap_writer_allocation = + dynamic_cast( + holder.get()); + PADDLE_ENFORCE_NOT_NULL( + mmap_writer_allocation, + platform::errors::PreconditionNotMet( + "Tensor is not in shared memory." + "Now only Tensor on shared memory can be serialized.")); int type_idx = static_cast(t.type()); return py::make_tuple(mmap_writer_allocation->ipc_name(), - mmap_writer_allocation->size(), - type_idx, vectorize(t.dims()), t.lod()); + mmap_writer_allocation->size(), type_idx, + vectorize(t.dims()), t.lod()); }, [](py::tuple t) { // __setstate__ if (t.size() != 5) - throw std::runtime_error("Invalid LoDTensor state!"); + throw std::runtime_error("Invalid Tensor state!"); // 1. Create a new C++ instance - LoDTensor tensor; + framework::Tensor tensor; // 2. Rebuild Allocation const std::string &ipc_name = t[0].cast(); size_t size = t[1].cast(); auto shared_reader_holder = - memory::allocation::RebuildMemoryMapReaderAllocation( - ipc_name, size); + memory::allocation::RebuildMemoryMapReaderAllocation(ipc_name, + size); // 3. Maintain global fd set - VLOG(3) << "LoDTensor ipc name: " << ipc_name; + VLOG(3) << "Tensor ipc name: " << ipc_name; memory::allocation::MemoryMapFdSet::Instance().Insert(ipc_name); - // 4. Rebuild LoDTensor - tensor.ResetHolderWithType(shared_reader_holder, - static_cast(t[2].cast())); + // 4. Rebuild Tensor + tensor.ResetHolderWithType( + shared_reader_holder, + static_cast(t[2].cast())); tensor.Resize(make_ddim(t[3].cast>())); tensor.set_lod(t[4].cast()); diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc index 53d641896e43f..edd5cde938630 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.cc +++ b/paddle/pten/api/lib/utils/tensor_utils.cc @@ -31,7 +31,7 @@ void SetLoD(DstLoD* dst, const SrcLoD& src) { } } -std::unique_ptr MakePtenDenseTensor( +std::unique_ptr MakePtenDenseTensorBase( const paddle::framework::Tensor& src) { VLOG(3) << "MakePtenDenseTensor based Tensor."; pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()), @@ -44,15 +44,15 @@ std::unique_ptr MakePtenDenseTensor( } std::unique_ptr MakePtenDenseTensor( - const paddle::framework::LoDTensor& src) { - auto out = - MakePtenDenseTensor(static_cast(src)); + const paddle::framework::Tensor& src) { + auto out = MakePtenDenseTensorBase( + static_cast(src)); SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod), src.lod()); return std::move(out); } -std::unique_ptr MakePtenDenseTensor( +std::unique_ptr MakePtenDenseTensorBase( const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def) { pten::DenseTensorMeta meta{ arg_def.dtype, src.dims(), src.layout(), src.offset()}; @@ -71,16 +71,15 @@ std::unique_ptr MakePtenDenseTensor( } std::unique_ptr MakePtenDenseTensor( - const paddle::framework::LoDTensor& src, - const pten::TensorArgDef& arg_def) { - auto out = MakePtenDenseTensor( + const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def) { + auto out = MakePtenDenseTensorBase( static_cast(src), arg_def); SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod), src.lod()); return std::move(out); } -pten::Scalar MakePtenScalar(const paddle::framework::LoDTensor& src) { +pten::Scalar MakePtenScalar(const paddle::framework::Tensor& src) { PADDLE_ENFORCE_EQ(src.numel(), 1, paddle::platform::errors::InvalidArgument( @@ -138,7 +137,7 @@ pten::Scalar MakePtenScalarFromVar(const framework::Variable& variable) { } } -pten::ScalarArray MakePtenScalarArray(const paddle::framework::LoDTensor& src) { +pten::ScalarArray MakePtenScalarArray(const paddle::framework::Tensor& src) { if (src.type() == paddle::framework::proto::VarType::INT64) { return {src.data(), src.numel()}; } else if (src.type() == paddle::framework::proto::VarType::INT32) { @@ -295,7 +294,7 @@ std::unique_ptr MakePtenTensorBaseFromVar( return {}; } -void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { +void MovesStorageBase(pten::DenseTensor* src, paddle::framework::Tensor* dst) { PADDLE_ENFORCE_NOT_NULL( src, platform::errors::InvalidArgument( @@ -311,12 +310,12 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { dst->set_offset(src->meta().offset); } -void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) { - MovesStorage(src, static_cast(dst)); +void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { + MovesStorageBase(src, static_cast(dst)); SetLoD(dst->mutable_lod(), src->lod()); } -void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { +void SharesStorageBase(pten::DenseTensor* src, paddle::framework::Tensor* dst) { PADDLE_ENFORCE_NOT_NULL( src, platform::errors::InvalidArgument( @@ -333,13 +332,13 @@ void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { dst->set_offset(src->meta().offset); } -void SharesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) { - SharesStorage(src, static_cast(dst)); +void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { + SharesStorageBase(src, static_cast(dst)); SetLoD(dst->mutable_lod(), src->lod()); } -void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, - pten::DenseTensor* dst) { +void ReMakePtenDenseTensorBase(const paddle::framework::Tensor& src, + pten::DenseTensor* dst) { VLOG(3) << "ReMakePtenDenseTensor based Tensor."; auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); meta->dims = src.dims(); @@ -361,17 +360,17 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, shared_storage->ResetAllocation(src.Holder()); } -void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src, +void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, pten::DenseTensor* dst) { auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); SetLoD(&meta->lod, src.lod()); - ReMakePtenDenseTensor(static_cast(src), - dst); + ReMakePtenDenseTensorBase(static_cast(src), + dst); } -void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src, - const pten::TensorArgDef& arg_def, - pten::DenseTensor* dst) { +void ReMakePtenDenseTensorByArgDefBase(const paddle::framework::Tensor& src, + const pten::TensorArgDef& arg_def, + pten::DenseTensor* dst) { VLOG(3) << "ReMakePtenDenseTensor based Tensor and TensorArgDef."; auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); meta->dims = src.dims(); @@ -395,12 +394,12 @@ void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src, } } -void ReMakePtenDenseTensorByArgDef(const paddle::framework::LoDTensor& src, +void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def, pten::DenseTensor* dst) { auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); SetLoD(&meta->lod, src.lod()); - ReMakePtenDenseTensorByArgDef( + ReMakePtenDenseTensorByArgDefBase( static_cast(src), arg_def, dst); } diff --git a/paddle/pten/api/lib/utils/tensor_utils.h b/paddle/pten/api/lib/utils/tensor_utils.h index 06edb4a7516b0..0ac4ac7a33179 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.h +++ b/paddle/pten/api/lib/utils/tensor_utils.h @@ -33,12 +33,9 @@ namespace experimental { std::unique_ptr MakePtenDenseTensor( const paddle::framework::Tensor& src); -std::unique_ptr MakePtenDenseTensor( - const paddle::framework::LoDTensor& src); - -pten::Scalar MakePtenScalar(const paddle::framework::LoDTensor& src); +pten::Scalar MakePtenScalar(const paddle::framework::Tensor& src); -pten::ScalarArray MakePtenScalarArray(const paddle::framework::LoDTensor& src); +pten::ScalarArray MakePtenScalarArray(const paddle::framework::Tensor& src); pten::Scalar MakePtenScalarFromVar(const framework::Variable& variable); @@ -56,12 +53,8 @@ std::unique_ptr MakePtenTensorBaseFromVar( void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst); -void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst); - void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst); -void SharesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst); - /** * In order to improve the compatibility state performance, some tricky tool * functions are added. @@ -74,17 +67,10 @@ void SharesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst); void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, pten::DenseTensor* dst); -void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src, - pten::DenseTensor* dst); - void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def, pten::DenseTensor* dst); -void ReMakePtenDenseTensorByArgDef(const paddle::framework::LoDTensor& src, - const pten::TensorArgDef& arg_def, - pten::DenseTensor* dst); - void ReMakePtenDenseTensorFromVar(const framework::Variable& variable, const pten::TensorArgDef& arg_def, pten::DenseTensor* dst); diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index ec589b40e907f..0339abe0960c2 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -32,6 +32,10 @@ except Exception as e: raise e +# Patch LoDTensor +from . import core +core.LoDTensor = core.Tensor + # import all class inside framework into fluid module from . import framework from .framework import * @@ -69,6 +73,7 @@ from . import distribute_lookup_table from .param_attr import ParamAttr, WeightNormParamAttr from .data_feeder import DataFeeder + from .core import LoDTensor, LoDTensorArray, Scope, _Scope from .core import CPUPlace, XPUPlace, CUDAPlace, CUDAPinnedPlace, NPUPlace, IPUPlace, MLUPlace from .incubate import fleet From 1053b1d5ed04f411db50e66848210d9f1996bde4 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Sat, 15 Jan 2022 14:52:58 +0800 Subject: [PATCH 07/10] replace last contextT (#38971) --- paddle/pten/kernels/gpu/scale_kernel.cu | 4 ++-- paddle/pten/kernels/math_kernel.h | 24 ++++++++++++------------ paddle/pten/kernels/scale_kernel.h | 8 ++++---- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/paddle/pten/kernels/gpu/scale_kernel.cu b/paddle/pten/kernels/gpu/scale_kernel.cu index 4d63701413cd6..14ee75e4f9130 100644 --- a/paddle/pten/kernels/gpu/scale_kernel.cu +++ b/paddle/pten/kernels/gpu/scale_kernel.cu @@ -43,8 +43,8 @@ struct ScaleFunctor { } }; -template -void ScaleKernel(const ContextT& dev_ctx, +template +void ScaleKernel(const Context& dev_ctx, const DenseTensor& x, const Scalar& scale, float bias, diff --git a/paddle/pten/kernels/math_kernel.h b/paddle/pten/kernels/math_kernel.h index e01103fc5b847..65c0f84e696de 100644 --- a/paddle/pten/kernels/math_kernel.h +++ b/paddle/pten/kernels/math_kernel.h @@ -67,8 +67,8 @@ void SumKernel(const Context& dev_ctx, DataType out_dtype, DenseTensor* out); -template -DenseTensor Add(const ContextT& dev_ctx, +template +DenseTensor Add(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y, int axis) { @@ -77,12 +77,12 @@ DenseTensor Add(const ContextT& dev_ctx, pten::make_intrusive( dev_ctx.GetPlace()), std::move(out_meta)); - AddKernel(dev_ctx, x, y, axis, &dense_out); + AddKernel(dev_ctx, x, y, axis, &dense_out); return dense_out; } -template -DenseTensor Subtract(const ContextT& dev_ctx, +template +DenseTensor Subtract(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y, int axis) { @@ -91,12 +91,12 @@ DenseTensor Subtract(const ContextT& dev_ctx, pten::make_intrusive( dev_ctx.GetPlace()), std::move(out_meta)); - SubtractKernel(dev_ctx, x, y, axis, &dense_out); + SubtractKernel(dev_ctx, x, y, axis, &dense_out); return dense_out; } -template -DenseTensor Divide(const ContextT& dev_ctx, +template +DenseTensor Divide(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y, int axis) { @@ -105,12 +105,12 @@ DenseTensor Divide(const ContextT& dev_ctx, pten::make_intrusive( dev_ctx.GetPlace()), std::move(out_meta)); - DivideKernel(dev_ctx, x, y, axis, &dense_out); + DivideKernel(dev_ctx, x, y, axis, &dense_out); return dense_out; } -template -DenseTensor Multiply(const ContextT& dev_ctx, +template +DenseTensor Multiply(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y, int axis) { @@ -119,7 +119,7 @@ DenseTensor Multiply(const ContextT& dev_ctx, pten::make_intrusive( dev_ctx.GetPlace()), std::move(out_meta)); - MultiplyKernel(dev_ctx, x, y, axis, &dense_out); + MultiplyKernel(dev_ctx, x, y, axis, &dense_out); return dense_out; } diff --git a/paddle/pten/kernels/scale_kernel.h b/paddle/pten/kernels/scale_kernel.h index ba16db566b8bb..1cd11f0b8788f 100644 --- a/paddle/pten/kernels/scale_kernel.h +++ b/paddle/pten/kernels/scale_kernel.h @@ -28,15 +28,15 @@ void ScaleKernel(const Context& dev_ctx, bool bias_after_scale, DenseTensor* out); -template -DenseTensor Scale(const ContextT& dev_ctx, +template +DenseTensor Scale(const Context& dev_ctx, const DenseTensor& x, const Scalar& scale, float bias, bool bias_after_scale) { auto out_meta = UnchangedInferMeta(x.meta()); - auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); - ScaleKernel( + auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + ScaleKernel( dev_ctx, x, scale, bias, bias_after_scale, &dense_out); return dense_out; } From 35d2b71ab531b7b34c42576da49651ba7282300f Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Sat, 15 Jan 2022 15:44:48 +0800 Subject: [PATCH 08/10] [PTen] Remove cached kernel context (#38953) * remove cached kernel context * revert dataloader format change --- .../framework/new_executor/interpretercore.cc | 9 +- .../new_executor/interpretercore_util.cc | 11 +- .../new_executor/new_executor_defs.cc | 4 - .../new_executor/new_executor_defs.h | 5 +- paddle/fluid/framework/operator.cc | 122 +++++------------- paddle/fluid/framework/operator.h | 13 +- paddle/fluid/imperative/layer.cc | 15 +-- paddle/fluid/imperative/op_base.h | 5 - paddle/fluid/imperative/prepared_operator.cc | 100 ++++---------- paddle/fluid/imperative/prepared_operator.h | 13 +- paddle/fluid/imperative/tracer.cc | 2 - .../fluid/dataloader/dataloader_iter.py | 19 --- 12 files changed, 82 insertions(+), 236 deletions(-) diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index 950756c0394a5..aea9ad2035396 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -418,15 +418,16 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) { VLOG(4) << "Run pten kernel: " << op->Type(); VLOG(4) << instr_node.InnerRuntimeContext().get() << " " << &instr_node.DeviceContext(); + pten::KernelContext pt_kernel_context; op_with_kernel->BuildPtenKernelContext( *instr_node.InnerRuntimeContext().get(), - const_cast(&instr_node.DeviceContext())); + const_cast(&instr_node.DeviceContext()), + &pt_kernel_context); - (*instr_node.PtenKernel())(instr_node.PtenKernelContext()); + (*instr_node.PtenKernel())(&pt_kernel_context); op_with_kernel->WriteBackToOutputs( - instr_node.InnerRuntimeContext().get()); - instr_node.PtenKernelContext()->ClearData(); + instr_node.InnerRuntimeContext().get(), &pt_kernel_context); } else { instr_node.KernelFunc()(*instr_node.InnerExecutionContext().get()); } diff --git a/paddle/fluid/framework/new_executor/interpretercore_util.cc b/paddle/fluid/framework/new_executor/interpretercore_util.cc index 7ced4853c2d8f..214a1d728266b 100644 --- a/paddle/fluid/framework/new_executor/interpretercore_util.cc +++ b/paddle/fluid/framework/new_executor/interpretercore_util.cc @@ -425,13 +425,14 @@ void build_op_func_list(const platform::Place& place, } if (run_pten_kernel) { - op_with_kernel->BuildPtenKernelContext(runtime_context, dev_ctx); + pten::KernelContext pt_kernel_context; + op_with_kernel->BuildPtenKernelContext(runtime_context, dev_ctx, + &pt_kernel_context); op_func_node.pt_kernel_ = op_with_kernel->PtenKernel(); - op_func_node.pt_kernel_context_ = op_with_kernel->PtenKernelContext(); - (*op_func_node.pt_kernel_)(op_func_node.pt_kernel_context_); - op_with_kernel->WriteBackToOutputs(&runtime_context); - op_func_node.pt_kernel_context_->ClearData(); + (*op_func_node.pt_kernel_)(&pt_kernel_context); + op_with_kernel->WriteBackToOutputs(&runtime_context, + &pt_kernel_context); } else { op_func_node.kernel_func_ = OpKernelComputeFunc(kernel_iter->second); op_func_node.kernel_func_(exec_ctx); diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.cc b/paddle/fluid/framework/new_executor/new_executor_defs.cc index 654746794da4e..fb29e18887b4e 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.cc +++ b/paddle/fluid/framework/new_executor/new_executor_defs.cc @@ -688,10 +688,6 @@ pten::Kernel* Instruction::PtenKernel() const { return op_func_node_.pt_kernel_; } -pten::KernelContext* Instruction::PtenKernelContext() const { - return op_func_node_.pt_kernel_context_; -} - OpFuncType Instruction::KernelType() const { return op_func_node_.type_; } OperatorBase* Instruction::OpBase() const { diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h index 5d63eb33d424b..0ef85a25a237b 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.h +++ b/paddle/fluid/framework/new_executor/new_executor_defs.h @@ -299,8 +299,7 @@ struct OpFuncNode { platform::DeviceContext* dev_ctx_; // not owned // fit for pten kernel - pten::Kernel* pt_kernel_{nullptr}; // not owned - pten::KernelContext* pt_kernel_context_{nullptr}; // not onwed + pten::Kernel* pt_kernel_{nullptr}; // not owned OpFuncType type_; }; @@ -322,8 +321,6 @@ class Instruction { pten::Kernel* PtenKernel() const; - pten::KernelContext* PtenKernelContext() const; - OpFuncType KernelType() const; OperatorBase* OpBase() const; diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index aa21c8eed256b..ff12edb72c06a 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1192,13 +1192,10 @@ void OperatorWithKernel::RunImpl(const Scope& scope, platform::RecordEvent record_event("compute", platform::EventRole::kInnerOp); if (run_pten_kernel_) { - if (pt_kernel_context_ == nullptr) { - pt_kernel_context_.reset(new pten::KernelContext()); - } - BuildPtenKernelContext(*runtime_ctx, dev_ctx); - (*pt_kernel_)(pt_kernel_context_.get()); - WriteBackToOutputs(runtime_ctx); - pt_kernel_context_->ClearData(); + pten::KernelContext pt_kernel_context; + BuildPtenKernelContext(*runtime_ctx, dev_ctx, &pt_kernel_context); + (*pt_kernel_)(&pt_kernel_context); + WriteBackToOutputs(runtime_ctx, &pt_kernel_context); } else { (*kernel_func_)( ExecutionContext(*this, exec_scope, *dev_ctx, *runtime_ctx)); @@ -1791,18 +1788,9 @@ KernelSignature OperatorWithKernel::GetExpectedPtenKernelArgs( } void OperatorWithKernel::BuildPtenKernelContext( - const RuntimeContext& ctx, platform::DeviceContext* dev_ctx) const { - if (pt_kernel_context_ == nullptr) { - pt_kernel_context_.reset(new pten::KernelContext()); - } - // TODO(chenweihang): now only work for very simple case, - // many cases need to be deal with later: - // 1. the input and output are not tensor - // 2. the dispensbale, duplicable input and output - // 3. needless attributes remove - // 4. use pt Tensor directly - // 5. kernel input is not DenseTensor - pt_kernel_context_->SetDeviceContext(dev_ctx); + const RuntimeContext& ctx, platform::DeviceContext* dev_ctx, + pten::KernelContext* pt_kernel_context) const { + pt_kernel_context->SetDeviceContext(dev_ctx); auto& input_names = std::get<0>(pt_kernel_signature_->args); auto& attr_names = std::get<1>(pt_kernel_signature_->args); @@ -1836,33 +1824,14 @@ void OperatorWithKernel::BuildPtenKernelContext( // calcute the start and end index of the input tensors size_t start_idx = - (i == 0 ? 0 : pt_kernel_context_->InputRangeAt(i - 1).second); + (i == 0 ? 0 : pt_kernel_context->InputRangeAt(i - 1).second); size_t end_idx = start_idx + ins_vector.size(); - auto current_vector_size = pt_kernel_context_->InputsSize(); - // If the memory needed is less than the current memory allocated, we will - // reuse the current memory by using ReMakePtenDenseTensorFromVar. - // Otherwise,we will create new storage. for (size_t offset = 0; offset < ins_vector.size(); ++offset) { - if (current_vector_size > start_idx + offset) { - auto& input_ptr = - pt_kernel_context_->MutableInputPtrAt(start_idx + offset); - if (input_ptr == nullptr) { - input_ptr = experimental::MakePtenTensorBaseFromVar( - *ins_vector[offset], in_def); - } else { - experimental::ReMakePtenDenseTensorFromVar( - *ins_vector[offset], in_def, - pt_kernel_context_->MutableInputAt(start_idx + - offset)); - } - } else { - pt_kernel_context_->EmplaceBackInputWithoutSetRange( - experimental::MakePtenTensorBaseFromVar(*ins_vector[offset], - in_def)); - } + pt_kernel_context->EmplaceBackInputWithoutSetRange( + experimental::MakePtenTensorBaseFromVar(*ins_vector[offset], in_def)); } - pt_kernel_context_->AssignInputRange(std::make_pair(start_idx, end_idx), i); + pt_kernel_context->AssignInputRange(std::make_pair(start_idx, end_idx), i); } for (size_t i = 0; i < output_names.size(); ++i) { @@ -1870,43 +1839,24 @@ void OperatorWithKernel::BuildPtenKernelContext( auto& outs_vector = ctx.outputs.at(output_names[i]); size_t start_idx = - (i == 0 ? 0 : pt_kernel_context_->OutputRangeAt(i - 1).second); + (i == 0 ? 0 : pt_kernel_context->OutputRangeAt(i - 1).second); size_t end_idx = start_idx + outs_vector.size(); - auto current_vector_size = pt_kernel_context_->OutputsSize(); - // If the memory needed is less than the current memory allocated, we will - // reuse the current memory by using ReMakePtenDenseTensorFromVar. - // Otherwise,we will create new storage. for (size_t offset = 0; offset < outs_vector.size(); ++offset) { - if (current_vector_size > start_idx + offset) { - auto* buffer_tensor = - pt_kernel_context_->MutableOutputAt(start_idx + - offset); - if (buffer_tensor) { - experimental::ReMakePtenDenseTensorFromVar(outs_vector[offset], - out_def, buffer_tensor); - } - } else { - pt_kernel_context_->EmplaceBackOutputWithoutSetRange( - experimental::MakePtenTensorBaseFromVar(outs_vector[offset], - out_def)); - } + pt_kernel_context->EmplaceBackOutputWithoutSetRange( + experimental::MakePtenTensorBaseFromVar(outs_vector[offset], + out_def)); } // Deal with the case that some outputs are NULL when run the kernel. // For example : the outputs of matmul_grad are dx and dy, // sometimes dx or dy may be NULL. if (outs_vector.empty()) { - if (current_vector_size > start_idx) { - pt_kernel_context_->SetOutputWithoutSetRange(start_idx, {nullptr}); - } else { - pt_kernel_context_->EmplaceBackOutputWithoutSetRange({nullptr}); - } + pt_kernel_context->EmplaceBackOutputWithoutSetRange({nullptr}); end_idx = start_idx + 1; } - pt_kernel_context_->AssignOutputRange(std::make_pair(start_idx, end_idx), - i); + pt_kernel_context->AssignOutputRange(std::make_pair(start_idx, end_idx), i); } for (size_t i = 0; i < attr_names.size(); ++i) { @@ -1915,11 +1865,11 @@ void OperatorWithKernel::BuildPtenKernelContext( if (attr_iter != Attrs().end()) { // shape is in the attribute if (std::type_index(attr_iter->second.type()) == std::type_index(typeid(std::vector))) { - pt_kernel_context_->EmplaceBackAttr(std::move(pten::ScalarArray( + pt_kernel_context->EmplaceBackAttr(std::move(pten::ScalarArray( BOOST_GET_CONST(std::vector, attr_iter->second)))); } else if (std::type_index(attr_iter->second.type()) == std::type_index(typeid(std::vector))) { - pt_kernel_context_->EmplaceBackAttr(std::move(pten::ScalarArray( + pt_kernel_context->EmplaceBackAttr(std::move(pten::ScalarArray( BOOST_GET_CONST(std::vector, attr_iter->second)))); } else { PADDLE_THROW(platform::errors::Unimplemented( @@ -1930,10 +1880,10 @@ void OperatorWithKernel::BuildPtenKernelContext( } else { // shape is in the input auto& ins_vector = ctx.inputs.at(attr_names[i]); if (ins_vector.size() == 1) { // ShapeTensor - pt_kernel_context_->EmplaceBackAttr(std::move( + pt_kernel_context->EmplaceBackAttr(std::move( experimental::MakePtenScalarArrayFromVar(*ins_vector.front()))); } else { // ShapeTensorList - pt_kernel_context_->EmplaceBackAttr(std::move( + pt_kernel_context->EmplaceBackAttr(std::move( experimental::MakePtenScalarArrayFromVarList(ins_vector))); } } @@ -1946,11 +1896,11 @@ void OperatorWithKernel::BuildPtenKernelContext( if (attr_iter != Attrs().end()) { // scalar is in the attribute auto& attr = Attrs().at(attr_names[i]); if (std::type_index(attr.type()) == std::type_index(typeid(float))) { - pt_kernel_context_->EmplaceBackAttr( + pt_kernel_context->EmplaceBackAttr( std::move(pten::Scalar(BOOST_GET_CONST(float, attr)))); } else if (std::type_index(attr.type()) == std::type_index(typeid(std::string))) { - pt_kernel_context_->EmplaceBackAttr( + pt_kernel_context->EmplaceBackAttr( std::move(pten::Scalar(BOOST_GET_CONST(std::string, attr)))); } else { PADDLE_THROW(platform::errors::Unimplemented( @@ -1960,7 +1910,7 @@ void OperatorWithKernel::BuildPtenKernelContext( } } else { auto& ins_vector = ctx.inputs.at(attr_names[i]); - pt_kernel_context_->EmplaceBackAttr(std::move( + pt_kernel_context->EmplaceBackAttr(std::move( experimental::MakePtenScalarFromVar(*ins_vector.front()))); } @@ -1968,17 +1918,17 @@ void OperatorWithKernel::BuildPtenKernelContext( // TODO(chenweihang): support other attrs later auto& attr = Attrs().at(attr_names[i]); if (attr_defs[i].type_index == std::type_index(typeid(int))) { - pt_kernel_context_->EmplaceBackAttr(BOOST_GET_CONST(int, attr)); + pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(int, attr)); } else if (attr_defs[i].type_index == std::type_index(typeid(float))) { - pt_kernel_context_->EmplaceBackAttr(BOOST_GET_CONST(float, attr)); + pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(float, attr)); } else if (attr_defs[i].type_index == std::type_index(typeid(bool))) { - pt_kernel_context_->EmplaceBackAttr(BOOST_GET_CONST(bool, attr)); + pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(bool, attr)); } else if (attr_defs[i].type_index == std::type_index(typeid(pten::DataType))) { auto data_type = pten::TransToPtenDataType( static_cast( BOOST_GET_CONST(int, attr))); - pt_kernel_context_->EmplaceBackAttr(data_type); + pt_kernel_context->EmplaceBackAttr(data_type); } else if (attr_defs[i].type_index == std::type_index(typeid(std::vector))) { if (std::type_index(attr.type()) == @@ -1987,7 +1937,7 @@ void OperatorWithKernel::BuildPtenKernelContext( const auto& vector_int_attr = BOOST_GET_CONST(std::vector, attr); const std::vector vector_int64_attr(vector_int_attr.begin(), vector_int_attr.end()); - pt_kernel_context_->EmplaceBackAttr(vector_int64_attr); + pt_kernel_context->EmplaceBackAttr(vector_int64_attr); } // TODO(YuanRisheng) Need support vector attr @@ -2001,20 +1951,16 @@ void OperatorWithKernel::BuildPtenKernelContext( } } -void OperatorWithKernel::WriteBackToOutputs(RuntimeContext* ctx) const { - // auto& input_names = std::get<0>(pt_kernel_signature_->args); - // auto& attr_names = std::get<1>(pt_kernel_signature_->args); +void OperatorWithKernel::WriteBackToOutputs( + RuntimeContext* ctx, pten::KernelContext* pt_kernel_context) const { auto& output_names = std::get<2>(pt_kernel_signature_->args); - // pt_kernel_context_ - for (size_t i = 0; i < output_names.size(); ++i) { auto& outs_vector = ctx->outputs.at(output_names[i]); - auto& range_pair = pt_kernel_context_->OutputRangeAt(i); - auto pten_outs = - pt_kernel_context_->MutableOutputBetween( - range_pair.first, range_pair.second); + auto& range_pair = pt_kernel_context->OutputRangeAt(i); + auto pten_outs = pt_kernel_context->MutableOutputBetween( + range_pair.first, range_pair.second); for (size_t j = 0; j < pten_outs.size(); ++j) { if (pten_outs[j]) { diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 12946b416cf9f..3aab9165eae0a 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -589,16 +589,14 @@ class OperatorWithKernel : public OperatorBase { void ChoosePtenKernel(const ExecutionContext& ctx) const; void BuildPtenKernelContext(const RuntimeContext& ctx, - platform::DeviceContext* dev_ctx) const; + platform::DeviceContext* dev_ctx, + pten::KernelContext* pt_kernel_context) const; - void WriteBackToOutputs(RuntimeContext* ctx) const; + void WriteBackToOutputs(RuntimeContext* ctx, + pten::KernelContext* pt_kernel_context) const; pten::Kernel* PtenKernel() const { return pt_kernel_.get(); } - pten::KernelContext* PtenKernelContext() const { - return pt_kernel_context_.get(); - } - const OpKernelType* kernel_type() const { return kernel_type_.get(); } private: @@ -657,9 +655,6 @@ class OperatorWithKernel : public OperatorBase { mutable bool run_pten_kernel_ = false; mutable std::unique_ptr pt_kernel_signature_; mutable std::unique_ptr pt_kernel_; - // In order to reduce the compatibility phase - // performance overhead, temporarily cache KernelContext - mutable std::unique_ptr pt_kernel_context_; }; extern bool OpSupportGPU(const std::string& op_type); diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index d8ee400e35082..cc7fcf455a13d 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -409,8 +409,6 @@ void VarBase::_CopyGradientFrom(const VarBase& src) { } } -pten::KernelContext OpBase::pt_kernel_context_; - void OpBase::SetType(const std::string& type) { op_ = framework::OpRegistry::CreateOp(type, {}, {}, {}, false); } @@ -426,8 +424,7 @@ static void OpBaseRunImpl(const framework::OperatorBase& op, const NameVarMap& outs, const framework::AttributeMap& attrs, const framework::AttributeMap& default_attrs, - const platform::Place& place, - pten::KernelContext* pt_kernel_context) { + const platform::Place& place) { auto* op_kernel = dynamic_cast(&op); PADDLE_ENFORCE_NOT_NULL( op_kernel, platform::errors::PermissionDenied( @@ -468,8 +465,8 @@ static void OpBaseRunImpl(const framework::OperatorBase& op, * after the execution of op, but the original input is directly * overwritten in the previous dynamic graph implemention. */ - auto prepared_op = PreparedOp::Prepare(ins, outs, *op_kernel, place, attrs, - default_attrs, pt_kernel_context); + auto prepared_op = + PreparedOp::Prepare(ins, outs, *op_kernel, place, attrs, default_attrs); auto tmp_ins_ptr = PrepareData(*op_kernel, ins, prepared_op.kernel_type()); if (tmp_ins_ptr == nullptr) { @@ -497,8 +494,7 @@ void OpBase::Run(const framework::OperatorBase& op, const framework::AttributeMap& attrs, const framework::AttributeMap& default_attrs, const platform::Place& place) { - OpBaseRunImpl(op, ins, outs, attrs, default_attrs, place, - &pt_kernel_context_); + OpBaseRunImpl(op, ins, outs, attrs, default_attrs, place); } void OpBase::Run(const framework::OperatorBase& op, @@ -507,8 +503,7 @@ void OpBase::Run(const framework::OperatorBase& op, const framework::AttributeMap& attrs, const framework::AttributeMap& default_attrs, const platform::Place& place) { - OpBaseRunImpl(op, ins, outs, attrs, default_attrs, place, - &pt_kernel_context_); + OpBaseRunImpl(op, ins, outs, attrs, default_attrs, place); } void ClearNoNeedBufferInputs(OpBase* op) { diff --git a/paddle/fluid/imperative/op_base.h b/paddle/fluid/imperative/op_base.h index cb76a82353282..3d0847605566b 100644 --- a/paddle/fluid/imperative/op_base.h +++ b/paddle/fluid/imperative/op_base.h @@ -183,8 +183,6 @@ class OpBase { const framework::AttributeMap& default_attrs, const platform::Place& place); - static pten::KernelContext* GetKernelContext() { return &pt_kernel_context_; } - bool HasVoidFunctionPostHook() const { return !void_function_post_hooks_.empty(); } @@ -212,9 +210,6 @@ class OpBase { std::unique_ptr op_; platform::Place place_; size_t id_{-1UL}; - // In order to reduce the compatibility phase - // performance overhead, temporarily cache KernelContext - static pten::KernelContext pt_kernel_context_; std::vector>> void_function_post_hooks_; }; diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index 46e974c8f43f3..15a278c2e6464 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -117,7 +117,6 @@ PreparedOp::PreparedOp(const framework::OperatorBase& op, const framework::OpKernelType& kernel_type, const framework::KernelSignature& kernel_signature, const pten::Kernel& pt_kernel, - pten::KernelContext* pt_kernel_context, platform::DeviceContext* dev_ctx) : op_(op), ctx_(ctx), @@ -126,8 +125,7 @@ PreparedOp::PreparedOp(const framework::OperatorBase& op, dev_ctx_(dev_ctx), run_pten_kernel_(true), pt_kernel_signature_(kernel_signature), - pt_kernel_(pt_kernel), - pt_kernel_context_(pt_kernel_context) {} + pt_kernel_(pt_kernel) {} template PreparedOp PrepareImpl(const NameVarMap& ins, @@ -135,8 +133,7 @@ PreparedOp PrepareImpl(const NameVarMap& ins, const framework::OperatorWithKernel& op, const platform::Place& place, const framework::AttributeMap& attrs, - const framework::AttributeMap& default_attrs, - pten::KernelContext* pt_kernel_context) { + const framework::AttributeMap& default_attrs) { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto* dev_ctx = pool.Get(place); @@ -178,7 +175,7 @@ PreparedOp PrepareImpl(const NameVarMap& ins, // TODO(chenweihang): using CPUKernel when miss device kernel case return PreparedOp(op, ctx, expected_kernel_key, pt_kernel_signature, - pt_kernel, pt_kernel_context, dev_ctx); + pt_kernel, dev_ctx); } else { VLOG(6) << "Dynamic mode ChoosePtenKernel - kernel `" << pt_kernel_name << "` not found."; @@ -247,10 +244,8 @@ PreparedOp PreparedOp::Prepare(const NameVarMap& ins, const framework::OperatorWithKernel& op, const platform::Place& place, const framework::AttributeMap& attrs, - const framework::AttributeMap& default_attrs, - pten::KernelContext* pt_kernel_context) { - return PrepareImpl(ins, outs, op, place, attrs, default_attrs, - pt_kernel_context); + const framework::AttributeMap& default_attrs) { + return PrepareImpl(ins, outs, op, place, attrs, default_attrs); } PreparedOp PreparedOp::Prepare(const NameVarMap& ins, @@ -258,10 +253,9 @@ PreparedOp PreparedOp::Prepare(const NameVarMap& ins, const framework::OperatorWithKernel& op, const platform::Place& place, const framework::AttributeMap& attrs, - const framework::AttributeMap& default_attrs, - pten::KernelContext* pt_kernel_context) { + const framework::AttributeMap& default_attrs) { return PrepareImpl(ins, outs, op, place, attrs, - default_attrs, pt_kernel_context); + default_attrs); } template @@ -271,13 +265,6 @@ static void BuildDygraphPtenKernelContext( const NameVarMap& outs, const framework::AttributeMap& attrs, const framework::AttributeMap& default_attrs, platform::DeviceContext* dev_ctx, pten::KernelContext* kernel_ctx) { - // TODO(chenweihang): now only work for very simple case, - // many cases need to be deal with later: - // 1. the input and output are not tensor - // 2. the dispensbale, duplicable input and output - // 3. needless attributes remove - // 4. use pt Tensor directly - // 5. kernel input is not DenseTensor kernel_ctx->SetDeviceContext(dev_ctx); auto& input_names = std::get<0>(pt_kernel_signature.args); @@ -312,26 +299,11 @@ static void BuildDygraphPtenKernelContext( size_t start_idx = (i == 0 ? 0 : kernel_ctx->InputRangeAt(i - 1).second); size_t end_idx = start_idx + ins_vector.size(); - auto current_vector_size = kernel_ctx->InputsSize(); - // If the memory needed is less than the current memory allocated, we will - // reuse the current memory by using ReMakePtenDenseTensorFromVar. - // Otherwise,we will create new storage. for (size_t offset = 0; offset < ins_vector.size(); ++offset) { const auto& variable = ins_vector[offset]->Var(); - if (current_vector_size > start_idx + offset) { - auto& input_ptr = kernel_ctx->MutableInputPtrAt(start_idx + offset); - if (input_ptr == nullptr) { - input_ptr = experimental::MakePtenTensorBaseFromVar(variable, in_def); - } else { - experimental::ReMakePtenDenseTensorFromVar( - variable, in_def, kernel_ctx->MutableInputAt( - start_idx + offset)); - } - } else { - kernel_ctx->EmplaceBackInputWithoutSetRange( - experimental::MakePtenTensorBaseFromVar(variable, in_def)); - } + kernel_ctx->EmplaceBackInputWithoutSetRange( + paddle::experimental::MakePtenTensorBaseFromVar(variable, in_def)); } kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i); } @@ -340,15 +312,10 @@ static void BuildDygraphPtenKernelContext( auto& out_def = output_defs.at(i); size_t start_idx = (i == 0 ? 0 : kernel_ctx->OutputRangeAt(i - 1).second); - auto current_vector_size = kernel_ctx->OutputsSize(); auto iter = outs.find(output_names[i]); if (iter == outs.end()) { - if (current_vector_size > start_idx) { - kernel_ctx->SetOutputWithoutSetRange(start_idx, {nullptr}); - } else { - kernel_ctx->EmplaceBackOutputWithoutSetRange({nullptr}); - } + kernel_ctx->EmplaceBackOutputWithoutSetRange({nullptr}); kernel_ctx->AssignOutputRange(std::make_pair(start_idx, start_idx + 1), i); continue; @@ -357,27 +324,10 @@ static void BuildDygraphPtenKernelContext( auto& outs_vector = iter->second; size_t end_idx = start_idx + outs_vector.size(); - // If the memory needed is less than the current memory allocated, we will - // reuse the current memory by using ReMakePtenDenseTensorFromVar. - // Otherwise,we will create new storage. for (size_t offset = 0; offset < outs_vector.size(); ++offset) { - if (current_vector_size > start_idx + offset) { - auto* buffer_tensor = - kernel_ctx->MutableOutputAt(start_idx + offset); - if (buffer_tensor) { - experimental::ReMakePtenDenseTensorFromVar( - outs_vector[offset]->MutableVar(), out_def, buffer_tensor); - } else { - kernel_ctx->SetOutputWithoutSetRange( - start_idx + offset, - experimental::MakePtenTensorBaseFromVar( - outs_vector[offset]->MutableVar(), out_def)); - } - } else { - kernel_ctx->EmplaceBackOutputWithoutSetRange( - experimental::MakePtenTensorBaseFromVar( - outs_vector[offset]->MutableVar(), out_def)); - } + kernel_ctx->EmplaceBackOutputWithoutSetRange( + paddle::experimental::MakePtenTensorBaseFromVar( + outs_vector[offset]->MutableVar(), out_def)); } kernel_ctx->AssignOutputRange(std::make_pair(start_idx, end_idx), i); } @@ -556,19 +506,20 @@ static void PreparedOpRunPtImpl( const framework::OperatorBase& op, const framework::OpKernelType& kernel_type, const framework::KernelSignature& pt_kernel_signature, - const pten::Kernel& pt_kernel, pten::KernelContext* pt_kernel_context, - platform::DeviceContext* dev_ctx, const NameVarMap& ins, - const NameVarMap& outs, const framework::AttributeMap& attrs, + const pten::Kernel& pt_kernel, platform::DeviceContext* dev_ctx, + const NameVarMap& ins, const NameVarMap& outs, + const framework::AttributeMap& attrs, const framework::AttributeMap& default_attrs) { DygraphInferShapeContext infer_shape_ctx( &ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type); op.Info().infer_shape_(&infer_shape_ctx); + pten::KernelContext pt_kernel_context; BuildDygraphPtenKernelContext(pt_kernel_signature, pt_kernel, ins, outs, attrs, default_attrs, dev_ctx, - pt_kernel_context); + &pt_kernel_context); - pt_kernel(pt_kernel_context); + pt_kernel(&pt_kernel_context); if (FLAGS_benchmark) { dev_ctx->Wait(); @@ -578,10 +529,7 @@ static void PreparedOpRunPtImpl( #endif } - WriteBackToOutputs(pt_kernel_signature, outs, pt_kernel_context); - - // Ensure that it does not affect the VarBase life cycle management - pt_kernel_context->ClearData(); + WriteBackToOutputs(pt_kernel_signature, outs, &pt_kernel_context); // TODO(chenweihang): add debug flags later if (framework::IsComplexType(kernel_type.data_type_)) { @@ -595,8 +543,8 @@ void PreparedOp::Run(const NameVarMap& ins, const framework::AttributeMap& default_attrs) { if (run_pten_kernel_) { PreparedOpRunPtImpl(op_, kernel_type_, pt_kernel_signature_, - pt_kernel_, pt_kernel_context_, dev_ctx_, ins, - outs, attrs, default_attrs); + pt_kernel_, dev_ctx_, ins, outs, attrs, + default_attrs); } else { PreparedOpRunImpl(op_, ctx_, kernel_type_, func_, dev_ctx_, ins, outs, attrs, default_attrs); @@ -609,8 +557,8 @@ void PreparedOp::Run(const NameVarMap& ins, const framework::AttributeMap& default_attrs) { if (run_pten_kernel_) { PreparedOpRunPtImpl( - op_, kernel_type_, pt_kernel_signature_, pt_kernel_, pt_kernel_context_, - dev_ctx_, ins, outs, attrs, default_attrs); + op_, kernel_type_, pt_kernel_signature_, pt_kernel_, dev_ctx_, ins, + outs, attrs, default_attrs); } else { PreparedOpRunImpl(op_, ctx_, kernel_type_, func_, dev_ctx_, ins, outs, attrs, default_attrs); diff --git a/paddle/fluid/imperative/prepared_operator.h b/paddle/fluid/imperative/prepared_operator.h index 29747e79ef6fa..22f016e2cadc1 100644 --- a/paddle/fluid/imperative/prepared_operator.h +++ b/paddle/fluid/imperative/prepared_operator.h @@ -153,25 +153,21 @@ class PreparedOp { const framework::RuntimeContext& ctx, const framework::OpKernelType& kernel_type, const framework::KernelSignature& kernel_signature, - const pten::Kernel& pt_kernel, - pten::KernelContext* pt_kernel_context, - platform::DeviceContext* dev_ctx); + const pten::Kernel& pt_kernel, platform::DeviceContext* dev_ctx); static PreparedOp Prepare(const NameVarMap& ins, const NameVarMap& outs, const framework::OperatorWithKernel& op, const platform::Place& place, const framework::AttributeMap& attrs, - const framework::AttributeMap& default_attrs, - pten::KernelContext* pt_kernel_context = nullptr); + const framework::AttributeMap& default_attrs); static PreparedOp Prepare(const NameVarMap& ins, const NameVarMap& outs, const framework::OperatorWithKernel& op, const platform::Place& place, const framework::AttributeMap& attrs, - const framework::AttributeMap& default_attrs, - pten::KernelContext* pt_kernel_context = nullptr); + const framework::AttributeMap& default_attrs); void Run(const NameVarMap& in, const NameVarMap& out, const framework::AttributeMap& attrs, @@ -196,9 +192,6 @@ class PreparedOp { bool run_pten_kernel_{false}; framework::KernelSignature pt_kernel_signature_; pten::Kernel pt_kernel_; - // In order to reduce the compatibility phase - // performance overhead, temporarily cache KernelContext - pten::KernelContext* pt_kernel_context_; }; } // namespace imperative diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index 682916a9b323b..7ed9f08906a73 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -231,8 +231,6 @@ void Tracer::TraceOp(const std::string& type, const NameVarBaseMap& ins, OpBase::Run(*op, new_ins, outs, attrs, default_attrs, place); } catch (platform::EnforceNotMet& exception) { framework::AppendErrorOpHint(type, &exception); - // Compatible impl: clear pten kernel context data when throw error - OpBase::GetKernelContext()->ClearData(); throw std::move(exception); } catch (std::exception& ex) { PADDLE_THROW(platform::errors::Fatal( diff --git a/python/paddle/fluid/dataloader/dataloader_iter.py b/python/paddle/fluid/dataloader/dataloader_iter.py index 10a9358612960..a3e6ea6d1bc78 100644 --- a/python/paddle/fluid/dataloader/dataloader_iter.py +++ b/python/paddle/fluid/dataloader/dataloader_iter.py @@ -202,22 +202,6 @@ def _thread_loop(self, legacy_expected_place): # APIs in this thread. _set_expected_place(legacy_expected_place) - # NOTE(chenweihang): [ Why need to set not to execute pten kernel here? ] - # Now, in order to ensure that the execution performance of the dynamic - # graph mode in pten compatible state does not decline significantly, - # we have adopted the approach of caching a KernelContext globally for - # the dynamic graph tracer to reduce the construction and deconstruction - # overhead of data interfaces such as the compatible state DenseTensor. - # The static graph is each op caches a KernelContext, but the op of - # the dynamic graph will be constructed and destroyed every round of - # execution, so it is impossible to cache KernelContext for each op. - # However, it is not thread-safe if using only one global kernel context in - # dynamic graph. If the pten op of paddle is used in the DataLoader thread, - # it may cause access errors. We temporarily do not execute pten kernel - # in this scenario and will find a better solution later and remove - # this setting. - set_flags({'FLAGS_run_pten_kernel': False}) - while not self._thread_done_event.is_set(): try: indices = next(self._sampler_iter) @@ -519,9 +503,6 @@ def _thread_loop(self, legacy_expected_place): # APIs in this thread. _set_expected_place(legacy_expected_place) - # NOTE(chenweihang): See Note [ Why need to set not to execute pten kernel here? ] - set_flags({'FLAGS_run_pten_kernel': False}) - while not self._thread_done_event.is_set(): batch = self._get_data() if not self._thread_done_event.is_set(): From d13c779900b2cdab89d21e57f87ec571b8a441e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?= <39303645+Shixiaowei02@users.noreply.github.com> Date: Sat, 15 Jan 2022 20:03:54 +0800 Subject: [PATCH 09/10] isolates friends of storage, test=develop (#38977) --- paddle/pten/api/lib/utils/tensor_utils.cc | 45 +++++------------------ paddle/pten/core/compat_utils.h | 9 +---- 2 files changed, 10 insertions(+), 44 deletions(-) diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc index edd5cde938630..f304268bedf45 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.cc +++ b/paddle/pten/api/lib/utils/tensor_utils.cc @@ -325,9 +325,7 @@ void SharesStorageBase(pten::DenseTensor* src, paddle::framework::Tensor* dst) { platform::errors::InvalidArgument( "The destination Tensor is nullptr when move allocation.")); dst->Resize(src->dims()); - auto* storage = static_cast( - pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(src)); - dst->ResetHolderWithType(storage->GetAllocation(), + dst->ResetHolderWithType(src->Holder(), pten::TransToProtoVarType(src->dtype())); dst->set_offset(src->meta().offset); } @@ -345,19 +343,7 @@ void ReMakePtenDenseTensorBase(const paddle::framework::Tensor& src, meta->dtype = pten::TransToPtenDataType(src.type()); meta->layout = src.layout(); meta->offset = src.offset(); - - auto* shared_storage = static_cast( - pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst)); - PADDLE_ENFORCE_NOT_NULL( - shared_storage, - platform::errors::NotFound( - "Target DenseTensor's shared storage is nullptr.")); - - PADDLE_ENFORCE_EQ(src.IsInitialized(), - true, - paddle::platform::errors::InvalidArgument( - "Source Tensor is not initialized.")); - shared_storage->ResetAllocation(src.Holder()); + dst->ResetHolder(src.Holder()); } void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, @@ -378,19 +364,12 @@ void ReMakePtenDenseTensorByArgDefBase(const paddle::framework::Tensor& src, meta->layout = src.layout(); meta->offset = src.offset(); - auto* shared_storage = static_cast( - pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst)); - PADDLE_ENFORCE_NOT_NULL( - shared_storage, - platform::errors::NotFound( - "Target DenseTensor's shared storage is nullptr.")); - if (src.IsInitialized() && src.place() == pten::TransToFluidPlace(arg_def.backend)) { - shared_storage->ResetAllocation(src.Holder()); + dst->ResetHolder(src.Holder()); } else { - shared_storage->ResetAllocationPlace( - pten::TransToFluidPlace(arg_def.backend)); + // This does not affect the correctness, and will be modified immediately. + // dst->mutable_data(pten::TransToFluidPlace(arg_def.backend)); } } @@ -481,14 +460,10 @@ void MakeVariableFromPtenTensor(pten::DenseTensor* src, tensor->Resize(src->dims()); SetLoD(tensor->mutable_lod(), src->lod()); - // here dynamic_cast is slow - auto* storage = static_cast( - pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(src)); - if (!tensor->IsInitialized() || (tensor->IsInitialized() && - !IsSameAllocation(tensor->Holder(), storage->GetAllocation()))) { - tensor->ResetHolderWithType(std::move(storage->GetAllocation()), dtype); + !IsSameAllocation(tensor->Holder(), src->Holder()))) { + tensor->ResetHolderWithType(std::move(src->Holder()), dtype); } else { // Even the pten tensor and Variable have the same Alloctation (both have // the same pointer address, same size and same place) @@ -502,10 +477,8 @@ void MakeVariableFromPtenTensor(pten::DenseTensor* src, auto dtype = pten::TransToProtoVarType(src->dtype()); if (!tensor->value().IsInitialized()) { - auto storage = dynamic_cast( - pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(src)); - tensor->mutable_value()->ResetHolderWithType( - std::move(storage->GetAllocation()), dtype); + tensor->mutable_value()->ResetHolderWithType(std::move(src->Holder()), + dtype); } } else { PADDLE_THROW(platform::errors::Unimplemented( diff --git a/paddle/pten/core/compat_utils.h b/paddle/pten/core/compat_utils.h index 0bd82080ddebc..46e53e3997cc1 100644 --- a/paddle/pten/core/compat_utils.h +++ b/paddle/pten/core/compat_utils.h @@ -31,10 +31,6 @@ namespace pten { class CompatibleDenseTensorUtils { public: - static Storage* UnsafeGetMutableStorage(DenseTensor* tensor) { - return tensor->storage_.get(); - } - static DenseTensorMeta* GetMutableMeta(DenseTensor* tensor) { return &(tensor->meta_); } @@ -42,10 +38,7 @@ class CompatibleDenseTensorUtils { // only can deal with SharedStorage now static void ClearStorage(DenseTensor* tensor) { // use static_cast to improve performance, replace by dynamic_cast later - if (tensor->storage_ != nullptr) { - static_cast(tensor->storage_.get()) - ->Reset(); - } + tensor->MoveMemoryHolder(); } static DenseTensor Slice(const DenseTensor& tensor, From 5c3586746792056d86a72f114167103f98b3af29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?= <39303645+Shixiaowei02@users.noreply.github.com> Date: Sat, 15 Jan 2022 21:58:35 +0800 Subject: [PATCH 10/10] updates the ctor of tensor, test=develop (#38946) --- .../accumulation_node_test.cc | 15 ++- .../autograd_meta_test.cc | 5 +- .../data_structure_tests/eager_tensor_test.cc | 10 +- .../grad_node_info_test.cc | 10 +- .../data_structure_tests/grad_node_test.h | 5 +- .../grad_tensor_holder_test.cc | 15 ++- .../tensor_wrapper_test.cc | 10 +- .../tests/task_tests/eager_utils_test.cc | 15 ++- paddle/pten/api/lib/utils/allocator.h | 16 +-- paddle/pten/core/dense_tensor.cc | 6 +- paddle/pten/core/dense_tensor.h | 6 +- paddle/pten/core/storage.cc | 2 +- paddle/pten/core/storage.h | 19 +-- paddle/pten/tests/api/CMakeLists.txt | 2 - paddle/pten/tests/api/test_cast_api.cc | 4 +- paddle/pten/tests/api/test_conj_api.cc | 4 +- paddle/pten/tests/api/test_dot_api.cc | 6 +- paddle/pten/tests/api/test_elementwise_api.cc | 24 ++-- paddle/pten/tests/api/test_empty_api.cc | 12 +- paddle/pten/tests/api/test_fill_api.cc | 22 ++-- paddle/pten/tests/api/test_flatten_api.cc | 4 +- paddle/pten/tests/api/test_matmul_api.cc | 20 +-- paddle/pten/tests/api/test_mean_api.cc | 4 +- paddle/pten/tests/api/test_reshape_api.cc | 4 +- paddle/pten/tests/api/test_storage.cc | 65 --------- paddle/pten/tests/api/test_sum_api.cc | 4 +- paddle/pten/tests/api/test_tensor_utils.cc | 124 ------------------ paddle/pten/tests/api/test_to_api.cc | 4 +- paddle/pten/tests/core/CMakeLists.txt | 2 - paddle/pten/tests/core/allocator.h | 67 +--------- paddle/pten/tests/core/test_allocator.cc | 95 -------------- paddle/pten/tests/core/test_dense_tensor.cc | 13 +- paddle/pten/tests/core/test_storage.cc | 40 ------ .../pten/tests/kernels/test_cast_dev_api.cc | 4 +- .../pten/tests/kernels/test_conj_dev_api.cc | 4 +- .../pten/tests/kernels/test_copy_dev_api.cc | 6 +- .../tests/kernels/test_creation_dev_api.cc | 8 +- paddle/pten/tests/kernels/test_dot_dev_api.cc | 6 +- .../tests/kernels/test_elementwise_dev_api.cc | 24 ++-- .../tests/kernels/test_flatten_dev_api.cc | 4 +- .../pten/tests/kernels/test_matmul_dev_api.cc | 6 +- .../pten/tests/kernels/test_mean_dev_api.cc | 4 +- .../tests/kernels/test_reshape_dev_api.cc | 4 +- .../pten/tests/kernels/test_scale_dev_api.cc | 13 +- paddle/pten/tests/kernels/test_sum_dev_api.cc | 4 +- 45 files changed, 175 insertions(+), 566 deletions(-) delete mode 100644 paddle/pten/tests/api/test_storage.cc delete mode 100644 paddle/pten/tests/api/test_tensor_utils.cc delete mode 100644 paddle/pten/tests/core/test_allocator.cc delete mode 100644 paddle/pten/tests/core/test_storage.cc diff --git a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc index f249d2099f24c..cdc9701009513 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc @@ -32,15 +32,17 @@ TEST(AccumulationNode, EagerTensor) { pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT16, paddle::framework::make_ddim({1, 1})); std::shared_ptr dt0 = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); dt0->mutable_data()[0] = 10.0; EagerTensor et0 = EagerTensor(dt0); std::shared_ptr dt1 = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); dt1->mutable_data()[0] = 20.0; @@ -48,8 +50,9 @@ TEST(AccumulationNode, EagerTensor) { std::shared_ptr grad_dt = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); EagerTensor grad_et = EagerTensor(grad_dt); diff --git a/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc b/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc index 96845569ca0c5..3d45dc831d411 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc @@ -42,8 +42,9 @@ TEST(AutogradMeta, MemberFunction) { pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 2})); std::shared_ptr dt = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); auto* dt_ptr = dt->mutable_data(); dt_ptr[0] = 5.0f; diff --git a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc index 84daf4eac4ce6..a483ddb6a98f6 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc @@ -36,8 +36,9 @@ TEST(EagerTensor, Constructor) { pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 2})); std::shared_ptr dt = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); auto* dt_ptr = dt->mutable_data(); dt_ptr[0] = 5.0f; @@ -65,8 +66,9 @@ TEST(EagerTensor, MemberFunction) { pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 2})); std::shared_ptr dt = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); auto* dt_ptr = dt->mutable_data(); dt_ptr[0] = 5.0f; diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc index a89fb019d5b37..7f6609b88a527 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc @@ -41,8 +41,9 @@ TEST(GradNodeInfo, GradNodeBase) { pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 1})); std::shared_ptr dt = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); auto* dt_ptr = dt->mutable_data(); dt_ptr[0] = 5.0f; @@ -97,8 +98,9 @@ TEST(GradNodeInfo, GradNodeBase) { pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 1})); std::shared_ptr dt = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); auto* dt_ptr = dt->mutable_data(); dt_ptr[0] = 6.0f; diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h index 2870bfa8b0c94..433a00e27be0e 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h @@ -37,8 +37,9 @@ class GradTestNode : public egr::GradNodeBase { pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 1})); std::shared_ptr dt = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); auto* dt_ptr = dt->mutable_data(); dt_ptr[0] = 6.0f; diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc index 3581ef59cd5be..c88a5f5fdcef5 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc @@ -36,8 +36,9 @@ TEST(GradTensorHolder, Constructor) { pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({2, 2})); std::shared_ptr dt = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); EagerTensor et = EagerTensor(dt); @@ -52,15 +53,17 @@ TEST(GradTensorHolder, Interfaces) { pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 1})); std::shared_ptr dt0 = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); dt0->mutable_data()[0] = 10.0; EagerTensor et0 = EagerTensor(dt0); std::shared_ptr dt1 = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); dt1->mutable_data()[0] = 20.0; EagerTensor et1 = EagerTensor(dt1); diff --git a/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc b/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc index 6d78cf42d0c48..8bc739d455a95 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc @@ -25,8 +25,9 @@ TEST(TensorWrapper, Basic) { pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 2})); std::shared_ptr dt = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); auto* dt_ptr = dt->mutable_data(); dt_ptr[0] = 5.0f; @@ -51,8 +52,9 @@ TEST(TensorWrapper, Basic) { pten::DenseTensorMeta meta2 = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 2})); std::shared_ptr dt2 = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta2); auto* dt_ptr2 = dt->mutable_data(); dt_ptr2[0] = 6.0f; diff --git a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc index ea9aae83ff189..1b2f1287b069d 100644 --- a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc +++ b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc @@ -31,15 +31,17 @@ TEST(EagerUtils, AutoGradMeta) { pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 1})); std::shared_ptr dt0 = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); dt0->mutable_data()[0] = 10.0; EagerTensor et0 = EagerTensor(dt0); std::shared_ptr dt1 = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); dt1->mutable_data()[0] = 20.0; EagerTensor et1 = EagerTensor(dt1); @@ -106,8 +108,9 @@ egr::EagerTensor CreateTestCPUTensor(T val, pten::DenseTensorMeta(pten::DataType::FLOAT32, ddim); egr::EagerTensor tensor; std::shared_ptr dt = std::make_shared( - std::make_shared( - paddle::platform::CPUPlace()), + std::make_unique( + paddle::platform::CPUPlace()) + .get(), meta); auto* dt_ptr = dt->mutable_data(); for (int64_t i = 0; i < dt->numel(); i++) { diff --git a/paddle/pten/api/lib/utils/allocator.h b/paddle/pten/api/lib/utils/allocator.h index a8c05b7651689..acdba822ac4bb 100644 --- a/paddle/pten/api/lib/utils/allocator.h +++ b/paddle/pten/api/lib/utils/allocator.h @@ -22,25 +22,15 @@ limitations under the License. */ namespace paddle { namespace experimental { -class DefaultAllocator : public pten::deprecated::Allocator { +class DefaultAllocator : public pten::Allocator { public: - using Allocation = pten::deprecated::Allocation; explicit DefaultAllocator(const paddle::platform::Place& place) : place_(place) {} - static void Delete(Allocation* allocation) { - paddle::memory::allocation::Allocator::AllocationDeleter( - allocation->CastContextWithoutCheck()); + AllocationPtr Allocate(size_t bytes_size) override { + return memory::Alloc(place_, bytes_size); } - Allocation Allocate(size_t bytes_size) override { - paddle::memory::AllocationPtr a = memory::Alloc(place_, bytes_size); - void* ptr = a->ptr(); - return Allocation(ptr, a.release(), &Delete, place_); - } - - const paddle::platform::Place& place() override { return place_; } - private: paddle::platform::Place place_; }; diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc index eb6f834d72779..716e1ac3d30bb 100644 --- a/paddle/pten/core/dense_tensor.cc +++ b/paddle/pten/core/dense_tensor.cc @@ -24,13 +24,11 @@ limitations under the License. */ namespace pten { -DenseTensor::DenseTensor(const std::shared_ptr& a, - const DenseTensorMeta& meta) +DenseTensor::DenseTensor(Allocator* a, const DenseTensorMeta& meta) : meta_(meta), storage_(make_intrusive(a, SizeOf(dtype()) * numel())) {} -DenseTensor::DenseTensor(const std::shared_ptr& a, - DenseTensorMeta&& meta) +DenseTensor::DenseTensor(Allocator* a, DenseTensorMeta&& meta) : meta_(std::move(meta)), storage_(make_intrusive(a, SizeOf(dtype()) * numel())) {} diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h index 4f25fc296724c..db8d7a2a39c90 100644 --- a/paddle/pten/core/dense_tensor.h +++ b/paddle/pten/core/dense_tensor.h @@ -60,17 +60,15 @@ class TensorInplaceVersion { class DenseTensor : public TensorBase, public TypeInfoTraits { public: - using Allocator = deprecated::Allocator; - /// \brief Construct a dense tensor and allocate space. /// \param a The allocator used to allocate space. /// \param meta The meta data of dense tensor. - DenseTensor(const std::shared_ptr& a, const DenseTensorMeta& meta); + DenseTensor(Allocator* a, const DenseTensorMeta& meta); /// \brief Construct a dense tensor and allocate space. /// \param a The allocator used to allocate space. /// \param meta The meta data of dense tensor. - DenseTensor(const std::shared_ptr& a, DenseTensorMeta&& meta); + DenseTensor(Allocator* a, DenseTensorMeta&& meta); /// \brief Use existing storage space to create dense tensor. This interface /// can be used to deliberately create an uninitialized dense tensor. diff --git a/paddle/pten/core/storage.cc b/paddle/pten/core/storage.cc index f7c7f68734101..aacae7be88349 100644 --- a/paddle/pten/core/storage.cc +++ b/paddle/pten/core/storage.cc @@ -18,7 +18,7 @@ namespace pten { void TensorStorage::Realloc(size_t size) { this->Clear(); - data_ = paddle::memory::AllocShared(alloc_->place(), size); + data_ = alloc_->Allocate(size); size_ = size; } diff --git a/paddle/pten/core/storage.h b/paddle/pten/core/storage.h index cf18dd913093a..97d7f8d0f1105 100644 --- a/paddle/pten/core/storage.h +++ b/paddle/pten/core/storage.h @@ -91,12 +91,11 @@ class Storage : public intrusive_ref_counter { class TensorStorage : public Storage { public: using Place = paddle::platform::Place; - using Allocator = deprecated::Allocator; - explicit TensorStorage(const std::shared_ptr& a) : alloc_(a) {} + explicit TensorStorage(Allocator* a) : alloc_(a) {} - TensorStorage(const std::shared_ptr& a, size_t size) - : Storage(paddle::memory::AllocShared(a->place(), size)), alloc_(a) { + TensorStorage(Allocator* a, size_t size) + : Storage(a->Allocate(size)), alloc_(a) { size_ = data_->size(); } @@ -114,24 +113,18 @@ class TensorStorage : public Storage { size_t size() const noexcept override { return size_; } const Place& place() const override { - if (!data_ && !alloc_) { + if (!data_) { PADDLE_THROW(paddle::platform::errors::Unimplemented( "Unable to visit place: either data_ or alloc_ has to be initialized " "first.")); } - if (data_) { - return data_->place(); - } - return alloc_->place(); + return data_->place(); } bool OwnsMemory() const noexcept override { return true; } - const std::shared_ptr& allocator() const noexcept { - return alloc_; - } private: - const std::shared_ptr alloc_; + Allocator* alloc_; int64_t size_{0}; }; diff --git a/paddle/pten/tests/api/CMakeLists.txt b/paddle/pten/tests/api/CMakeLists.txt index ffbc551843148..79d9a3d82e69e 100644 --- a/paddle/pten/tests/api/CMakeLists.txt +++ b/paddle/pten/tests/api/CMakeLists.txt @@ -5,8 +5,6 @@ else() endif() cc_test(test_pten_exception SRCS test_pten_exception.cc DEPS gtest) -cc_test(test_framework_storage SRCS test_storage.cc DEPS pten_api_utils) -cc_test(test_framework_tensor_utils SRCS test_tensor_utils.cc DEPS pten_api_utils) cc_test(test_mean_api SRCS test_mean_api.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_dot_api SRCS test_dot_api.cc DEPS pten_tensor pten_api pten_api_utils) diff --git a/paddle/pten/tests/api/test_cast_api.cc b/paddle/pten/tests/api/test_cast_api.cc index c2660a1f80019..6608d1ed08cab 100644 --- a/paddle/pten/tests/api/test_cast_api.cc +++ b/paddle/pten/tests/api/test_cast_api.cc @@ -30,10 +30,10 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, cast) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/api/test_conj_api.cc b/paddle/pten/tests/api/test_conj_api.cc index 928f8e414fda0..50d190257a16d 100644 --- a/paddle/pten/tests/api/test_conj_api.cc +++ b/paddle/pten/tests/api/test_conj_api.cc @@ -30,10 +30,10 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, conj) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::COMPLEX64, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/api/test_dot_api.cc b/paddle/pten/tests/api/test_dot_api.cc index 41c03f8f26201..40e709b960334 100644 --- a/paddle/pten/tests/api/test_dot_api.cc +++ b/paddle/pten/tests/api/test_dot_api.cc @@ -30,17 +30,17 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, dot) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); auto* dense_x_data = dense_x->mutable_data(); auto dense_y = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/api/test_elementwise_api.cc b/paddle/pten/tests/api/test_elementwise_api.cc index e5971aae5513f..69af32eb457a6 100644 --- a/paddle/pten/tests/api/test_elementwise_api.cc +++ b/paddle/pten/tests/api/test_elementwise_api.cc @@ -30,17 +30,17 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, add) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); auto* dense_x_data = dense_x->mutable_data(); auto dense_y = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); @@ -84,17 +84,17 @@ TEST(API, add) { // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, subtract) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); auto* dense_x_data = dense_x->mutable_data(); auto dense_y = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); @@ -138,17 +138,17 @@ TEST(API, subtract) { // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, divide) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); auto* dense_x_data = dense_x->mutable_data(); auto dense_y = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); @@ -192,17 +192,17 @@ TEST(API, divide) { TEST(API, multiply) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); auto* dense_x_data = dense_x->mutable_data(); auto dense_y = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/api/test_empty_api.cc b/paddle/pten/tests/api/test_empty_api.cc index fcc01ad8a7172..f4e3f472c7990 100644 --- a/paddle/pten/tests/api/test_empty_api.cc +++ b/paddle/pten/tests/api/test_empty_api.cc @@ -30,10 +30,10 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, empty_like) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2}), pten::DataLayout::NCHW)); @@ -55,11 +55,11 @@ TEST(API, empty_like) { TEST(API, empty1) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_shape = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::INT64, framework::make_ddim({2}), pten::DataLayout::NCHW)); @@ -83,11 +83,11 @@ TEST(API, empty1) { } TEST(API, empty2) { - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_scalar = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::INT32, framework::make_ddim({1}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/api/test_fill_api.cc b/paddle/pten/tests/api/test_fill_api.cc index e87d094eec9d3..0d823765680e8 100644 --- a/paddle/pten/tests/api/test_fill_api.cc +++ b/paddle/pten/tests/api/test_fill_api.cc @@ -30,10 +30,10 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, full_like) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2}), pten::DataLayout::NCHW)); @@ -65,10 +65,10 @@ TEST(API, full_like) { TEST(API, zeros_like) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2}), pten::DataLayout::NCHW)); @@ -98,10 +98,10 @@ TEST(API, zeros_like) { TEST(API, ones_like) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::INT32, framework::make_ddim({3, 2}), pten::DataLayout::NCHW)); @@ -131,11 +131,11 @@ TEST(API, ones_like) { TEST(API, full1) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_shape = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::INT64, framework::make_ddim({2}), pten::DataLayout::NCHW)); @@ -144,7 +144,7 @@ TEST(API, full1) { shape_data[1] = 3; auto dense_scalar = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({1}), pten::DataLayout::NCHW)); @@ -177,11 +177,11 @@ TEST(API, full1) { } TEST(API, full2) { - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_scalar = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::INT32, framework::make_ddim({1}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/api/test_flatten_api.cc b/paddle/pten/tests/api/test_flatten_api.cc index 93c8a50f02a78..6c082b9653e6f 100644 --- a/paddle/pten/tests/api/test_flatten_api.cc +++ b/paddle/pten/tests/api/test_flatten_api.cc @@ -30,10 +30,10 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, flatten) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2, 2, 3}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/api/test_matmul_api.cc b/paddle/pten/tests/api/test_matmul_api.cc index bef0e2af4cf92..03f686f1c3f5e 100644 --- a/paddle/pten/tests/api/test_matmul_api.cc +++ b/paddle/pten/tests/api/test_matmul_api.cc @@ -30,10 +30,10 @@ using DDim = paddle::framework::DDim; TEST(API, matmul_cpu) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 3}), pten::DataLayout::NCHW)); @@ -41,7 +41,7 @@ TEST(API, matmul_cpu) { auto* dense_x_data = dense_x->mutable_data(); auto dense_y = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 3}), pten::DataLayout::NCHW)); @@ -79,10 +79,10 @@ TEST(API, matmul_cpu) { TEST(API, matmul_cuda) { // Prepare CPU Dense Tensor const auto alloc_cpu = - std::make_shared( + std::make_unique( paddle::platform::CPUPlace()); auto ref_x = std::make_shared( - alloc_cpu, + alloc_cpu.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 3}), pten::DataLayout::NCHW)); @@ -90,7 +90,7 @@ TEST(API, matmul_cuda) { auto* ref_x_data = ref_x->mutable_data(); auto ref_y = std::make_shared( - alloc_cpu, + alloc_cpu.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 3}), pten::DataLayout::NCHW)); @@ -104,16 +104,16 @@ TEST(API, matmul_cuda) { // 1. create tensor const auto alloc_cuda = - std::make_shared( + std::make_unique( paddle::platform::CUDAPlace()); auto dense_x = std::make_shared( - alloc_cuda, + alloc_cuda.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 3}), pten::DataLayout::NCHW)); auto dense_y = std::make_shared( - alloc_cuda, + alloc_cuda.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 3}), pten::DataLayout::NCHW)); @@ -143,7 +143,7 @@ TEST(API, matmul_cuda) { auto dense_out = std::dynamic_pointer_cast(out.impl()); auto ref_out = std::make_shared( - alloc_cpu, + alloc_cpu.get(), pten::DenseTensorMeta( pten::DataType::FLOAT32, out.dims(), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/api/test_mean_api.cc b/paddle/pten/tests/api/test_mean_api.cc index a8c4c5306dced..9d90e58101cbd 100644 --- a/paddle/pten/tests/api/test_mean_api.cc +++ b/paddle/pten/tests/api/test_mean_api.cc @@ -30,10 +30,10 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, mean) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/api/test_reshape_api.cc b/paddle/pten/tests/api/test_reshape_api.cc index 227dcc6e9568d..59e9e9fab1122 100644 --- a/paddle/pten/tests/api/test_reshape_api.cc +++ b/paddle/pten/tests/api/test_reshape_api.cc @@ -30,10 +30,10 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, reshape) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2, 2, 3}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/api/test_storage.cc b/paddle/pten/tests/api/test_storage.cc deleted file mode 100644 index 1a5d95f9419c5..0000000000000 --- a/paddle/pten/tests/api/test_storage.cc +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "gtest/gtest.h" - -#include "paddle/pten/api/lib/utils/allocator.h" -#include "paddle/pten/api/lib/utils/storage.h" - -namespace paddle { -namespace tests { - -TEST(host_storage, external_stroage) { - const size_t size{100}; - const auto a = std::make_shared( - paddle::platform::CPUPlace()); - pten::intrusive_ptr in_storage = - pten::make_intrusive(a, size); - char* data = static_cast(in_storage->data()); - for (size_t i = 0; i < size; ++i) { - data[i] = i; - } - const size_t delta{1}; - const size_t n{10}; - auto ex_storage = - pten::make_intrusive(in_storage, delta, n); - CHECK_EQ(ex_storage->size(), n); - CHECK(paddle::platform::is_cpu_place(ex_storage->place())); - CHECK(!ex_storage->OwnsMemory()); - for (size_t i = delta; i < delta + n; ++i) { - CHECK_EQ(data[i], static_cast(i)); - } -} - -TEST(host_storage, external_vector) { - std::vector data(100); - for (size_t i = 0; i < data.size(); ++i) { - data[i] = i; - } - const size_t delta{1}; - const size_t n{10}; - auto ex_storage = pten::make_intrusive( - data.data(), n, paddle::platform::CPUPlace()); - CHECK_EQ(ex_storage->size(), n); - CHECK(paddle::platform::is_cpu_place(ex_storage->place())); - CHECK(!ex_storage->OwnsMemory()); - for (size_t i = delta; i < delta + n; ++i) { - CHECK_EQ(data[i], static_cast(i)); - } -} - -} // namespace tests -} // namespace paddle diff --git a/paddle/pten/tests/api/test_sum_api.cc b/paddle/pten/tests/api/test_sum_api.cc index ff1609d3d4051..5a7c9840e1114 100644 --- a/paddle/pten/tests/api/test_sum_api.cc +++ b/paddle/pten/tests/api/test_sum_api.cc @@ -30,10 +30,10 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, sum) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/api/test_tensor_utils.cc b/paddle/pten/tests/api/test_tensor_utils.cc deleted file mode 100644 index 041bd28ad892a..0000000000000 --- a/paddle/pten/tests/api/test_tensor_utils.cc +++ /dev/null @@ -1,124 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "gtest/gtest.h" - -#include "paddle/pten/api/lib/utils/tensor_utils.h" -#include "paddle/pten/core/tensor_meta.h" - -namespace paddle { -namespace tests { - -using DDim = paddle::framework::DDim; -using DataType = paddle::experimental::DataType; -using DataLayout = paddle::experimental::DataLayout; - -using DenseTensor = pten::DenseTensor; -using DenseTensorMeta = pten::DenseTensorMeta; - -TEST(tensor_utils, dense_tensor_to_lod_tensor) { - const DDim dims({2, 1}); - const DataType dtype{DataType::FLOAT32}; - const DataLayout layout{DataLayout::NCHW}; - const pten::LoD lod{{0, 2}}; - DenseTensorMeta meta(dtype, dims, layout, lod); - - auto alloc = - std::make_shared(platform::CPUPlace()); - - DenseTensor dense_tensor(alloc, meta); - float* data = dense_tensor.mutable_data(); - data[0] = 1.0f; - data[1] = 2.1f; - - framework::LoDTensor lod_tensor; - experimental::MovesStorage(&dense_tensor, &lod_tensor); - - CHECK(dense_tensor.lod().size() == lod_tensor.lod().size()); - CHECK(dense_tensor.lod()[0] == - static_cast>((lod_tensor.lod()[0]))); - CHECK(dense_tensor.dtype() == pten::TransToPtenDataType(lod_tensor.type())); - CHECK(dense_tensor.layout() == lod_tensor.layout()); - CHECK(platform::is_cpu_place(lod_tensor.place())); - - CHECK(lod_tensor.data()[0] == 1.0f); - CHECK(lod_tensor.data()[1] == 2.1f); - - auto dense_tensor_1 = experimental::MakePtenDenseTensor(lod_tensor); - CHECK(dense_tensor_1->dims() == dims); - CHECK(dense_tensor_1->dtype() == dtype); - CHECK(dense_tensor_1->layout() == layout); - CHECK(dense_tensor_1->lod().size() == lod.size()); - CHECK(dense_tensor_1->lod()[0] == lod[0]); - const float* data_1 = dense_tensor_1->data(); - CHECK(data_1[0] == 1.0f); - CHECK(data_1[1] == 2.1f); -} - -TEST(tensor_utils, dense_tensor_to_tensor) { - const DDim dims({2, 1}); - const DataType dtype{DataType::FLOAT32}; - const DataLayout layout{DataLayout::NCHW}; - DenseTensorMeta meta(dtype, dims, layout); - - auto alloc = - std::make_shared(platform::CPUPlace()); - - DenseTensor dense_tensor(alloc, meta); - float* data = dense_tensor.mutable_data(); - data[0] = 1.0f; - data[1] = 2.1f; - - framework::Tensor tensor; - experimental::MovesStorage(&dense_tensor, &tensor); - - CHECK(dense_tensor.dtype() == pten::TransToPtenDataType(tensor.type())); - CHECK(dense_tensor.layout() == tensor.layout()); - CHECK(platform::is_cpu_place(tensor.place())); - - CHECK(tensor.data()[0] == 1.0f); - CHECK(tensor.data()[1] == 2.1f); - - auto dense_tensor_1 = experimental::MakePtenDenseTensor(tensor); - CHECK(dense_tensor_1->dims() == dims); - CHECK(dense_tensor_1->dtype() == dtype); - CHECK(dense_tensor_1->layout() == layout); - const float* data_1 = dense_tensor_1->data(); - CHECK(data_1[0] == 1.0f); - CHECK(data_1[1] == 2.1f); -} - -TEST(PtenUtils, VarToPtTensor) { - // 1. create Variable - paddle::framework::Variable v; - auto selected_rows = v.GetMutable(); - paddle::framework::Tensor* value = selected_rows->mutable_value(); - auto* data = value->mutable_data(paddle::framework::make_ddim({1, 1}), - paddle::platform::CPUPlace()); - data[0] = 123; - pten::Backend expect_backend = pten::Backend::CPU; - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - expect_backend = pten::Backend::GPU; -#endif - auto tensor_def = pten::TensorArgDef( - expect_backend, pten::DataLayout::NCHW, pten::DataType::INT32); - // 2. test API - auto tensor_x = experimental::MakePtenTensorBaseFromVar(v, tensor_def); - // 3. check result - ASSERT_EQ(tensor_x->dtype(), pten::DataType::INT32); -} - -} // namespace tests -} // namespace paddle diff --git a/paddle/pten/tests/api/test_to_api.cc b/paddle/pten/tests/api/test_to_api.cc index 47e8ff7c2c87e..9aef716029a69 100644 --- a/paddle/pten/tests/api/test_to_api.cc +++ b/paddle/pten/tests/api/test_to_api.cc @@ -28,10 +28,10 @@ namespace framework = paddle::framework; using DDim = paddle::framework::DDim; paddle::experimental::Tensor CreateInputTensor() { - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::INT64, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/core/CMakeLists.txt b/paddle/pten/tests/core/CMakeLists.txt index 9a5cfecc2917b..07554f02d9992 100644 --- a/paddle/pten/tests/core/CMakeLists.txt +++ b/paddle/pten/tests/core/CMakeLists.txt @@ -1,5 +1,3 @@ -cc_test(test_allocator SRCS test_allocator.cc DEPS tensor_base) -cc_test(test_storage SRCS test_storage.cc DEPS tensor_base) cc_test(test_dense_tensor SRCS test_dense_tensor.cc DEPS dense_tensor) cc_test(test_intrusive_ptr SRCS test_intrusive_ptr.cc) cc_test(test_type_info SRCS test_type_info.cc) diff --git a/paddle/pten/tests/core/allocator.h b/paddle/pten/tests/core/allocator.h index c2c74e1aacf1f..e78f288e8e545 100644 --- a/paddle/pten/tests/core/allocator.h +++ b/paddle/pten/tests/core/allocator.h @@ -21,76 +21,19 @@ limitations under the License. */ namespace pten { namespace tests { -class HostAllocatorSample : public pten::deprecated::RawAllocator { +class FancyAllocator : public pten::Allocator { public: - using Place = paddle::platform::Place; - void* Allocate(size_t bytes_size) override { - return ::operator new(bytes_size); - } - void Deallocate(void* ptr, size_t bytes_size) override { - return ::operator delete(ptr); - } - const Place& place() const override { return place_; } - - private: - Place place_{paddle::platform::CPUPlace()}; -}; - -class FancyAllocator : public pten::deprecated::Allocator { - public: - using Allocation = pten::deprecated::Allocation; static void Delete(Allocation* allocation) { ::operator delete(allocation->ptr()); } - Allocation Allocate(size_t bytes_size) override { + AllocationPtr Allocate(size_t bytes_size) override { void* data = ::operator new(bytes_size); - return Allocation(data, data, &Delete, place()); - } - - const paddle::platform::Place& place() override { return place_; } - - paddle::platform::Place place_ = paddle::platform::CPUPlace(); -}; - -template -struct CustomAllocator { - using value_type = T; - using Allocator = pten::deprecated::RawAllocator; - - explicit CustomAllocator(const std::shared_ptr& a) noexcept - : alloc_(a) {} - - CustomAllocator(const CustomAllocator&) noexcept = default; - T* allocate(std::size_t n) { - return static_cast(alloc_->Allocate(n * sizeof(T))); - } - void deallocate(T* p, std::size_t n) { - return alloc_->Deallocate(p, sizeof(T) * n); + auto* allocation = + new pten::Allocation(data, bytes_size, paddle::platform::CPUPlace()); + return AllocationPtr(allocation, Delete); } - - template - friend bool operator==(const CustomAllocator&, - const CustomAllocator&) noexcept; - template - friend bool operator!=(const CustomAllocator&, - const CustomAllocator&) noexcept; - - private: - std::shared_ptr alloc_; }; -template -inline bool operator==(const CustomAllocator& lhs, - const CustomAllocator& rhs) noexcept { - return &lhs.alloc_ == &rhs.alloc_; -} - -template -inline bool operator!=(const CustomAllocator& lhs, - const CustomAllocator& rhs) noexcept { - return &lhs.alloc_ != &rhs.alloc_; -} - } // namespace tests } // namespace pten diff --git a/paddle/pten/tests/core/test_allocator.cc b/paddle/pten/tests/core/test_allocator.cc deleted file mode 100644 index 94ba9a1e1b9a2..0000000000000 --- a/paddle/pten/tests/core/test_allocator.cc +++ /dev/null @@ -1,95 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "gtest/gtest.h" -#include "paddle/fluid/framework/generator.h" -#include "paddle/pten/tests/core/allocator.h" -#include "paddle/pten/tests/core/random.h" -#include "paddle/pten/tests/core/timer.h" - -namespace pten { -namespace tests { - -using RawAllocator = pten::deprecated::RawAllocator; -using Allocator = pten::deprecated::Allocator; -using Allocation = pten::deprecated::Allocation; - -template -bool host_allocator_test(size_t vector_size) { - std::vector src(vector_size); - std::generate(src.begin(), src.end(), make_generator(src)); - std::vector> dst( - src.begin(), - src.end(), - CustomAllocator(std::make_shared())); - return std::equal(src.begin(), src.end(), dst.begin()); -} - -TEST(raw_allocator, host) { - CHECK(host_allocator_test(1000)); - CHECK(host_allocator_test(1000)); - CHECK(host_allocator_test(1000)); -} - -class StorageRawAlloc { - public: - StorageRawAlloc(const std::shared_ptr& a, size_t size) - : alloc_(a) { - data_ = alloc_->Allocate(size); - } - ~StorageRawAlloc() { alloc_->Deallocate(data_, size); } - - private: - void* data_; - size_t size; - std::shared_ptr alloc_; -}; - -class StorageFancyAlloc { - public: - StorageFancyAlloc(const std::shared_ptr& a, size_t size) - : alloc_(a), allocation_(a->Allocate(size)) {} - - private: - std::shared_ptr alloc_; - Allocation allocation_; -}; - -TEST(benchmark, allocator) { - std::shared_ptr raw_allocator(new HostAllocatorSample); - std::shared_ptr fancy_allocator(new FancyAllocator); - const size_t cycles = 100; - Timer timer; - double t1{}, t2{}; - for (size_t i = 0; i < cycles; ++i) { - timer.tic(); - for (size_t i = 0; i < cycles; ++i) { - StorageRawAlloc(raw_allocator, i * 100); - } - t1 += timer.toc(); - timer.tic(); - for (size_t i = 0; i < cycles; ++i) { - StorageFancyAlloc(fancy_allocator, i * 100); - } - t2 += timer.toc(); - } - std::cout << "The cost of raw alloc is " << t1 << "ms.\n"; - std::cout << "The cost of fancy alloc with place is " << t2 << "ms.\n"; -} - -} // namespace tests -} // namespace pten diff --git a/paddle/pten/tests/core/test_dense_tensor.cc b/paddle/pten/tests/core/test_dense_tensor.cc index 8277c0d8dadb7..8564969796c7e 100644 --- a/paddle/pten/tests/core/test_dense_tensor.cc +++ b/paddle/pten/tests/core/test_dense_tensor.cc @@ -75,7 +75,8 @@ TEST(dense_tensor, ctor) { const LoD lod{}; DenseTensorMeta meta(dtype, dims, layout, lod); - auto alloc = std::make_shared(); + auto fancy_allocator = std::unique_ptr(new FancyAllocator); + auto* alloc = fancy_allocator.get(); auto check_dense_tensor = [](const DenseTensor& t, const DenseTensorMeta& m) -> bool { @@ -95,10 +96,6 @@ TEST(dense_tensor, ctor) { DenseTensor tensor_1(alloc, DenseTensorMeta(meta)); check_dense_tensor(tensor_0, meta); - - DenseTensor tensor_2(make_intrusive(alloc), meta); - CHECK_NOTNULL(tensor_2.mutable_data()); - check_dense_tensor(tensor_2, meta); } TEST(dense_tensor, resize) { @@ -108,7 +105,8 @@ TEST(dense_tensor, resize) { const LoD lod{}; DenseTensorMeta meta(dtype, dims, layout, lod); - auto alloc = std::make_shared(); + auto fancy_allocator = std::unique_ptr(new FancyAllocator); + auto* alloc = fancy_allocator.get(); DenseTensor tensor_0(alloc, meta); CHECK_EQ(tensor_0.capacity(), 2u); @@ -125,7 +123,8 @@ TEST(dense_tensor, shallow_copy) { const LoD lod{}; DenseTensorMeta meta(dtype, dims, layout, lod); - auto alloc = std::make_shared(); + auto fancy_allocator = std::unique_ptr(new FancyAllocator); + auto* alloc = fancy_allocator.get(); DenseTensor tensor_0(alloc, meta); DenseTensor tensor_1(tensor_0); diff --git a/paddle/pten/tests/core/test_storage.cc b/paddle/pten/tests/core/test_storage.cc deleted file mode 100644 index 69d1eae668c58..0000000000000 --- a/paddle/pten/tests/core/test_storage.cc +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "gtest/gtest.h" - -#include "paddle/pten/core/storage.h" -#include "paddle/pten/tests/core/allocator.h" - -namespace pten { -namespace tests { - -TEST(host_storage, internal) { - // TODO(Shixiaowei02): Here we need to consider the case - // where the size is zero. - const size_t size{100}; - const auto a = std::make_shared(); - TensorStorage storage(a, size); - CHECK_EQ(storage.size(), size); - CHECK(paddle::platform::is_cpu_place(storage.place())); - CHECK(storage.OwnsMemory()); - CHECK(storage.allocator() == a); - storage.Realloc(size + 100); - CHECK_EQ(storage.size(), size + 100); -} - -} // namespace tests -} // namespace pten diff --git a/paddle/pten/tests/kernels/test_cast_dev_api.cc b/paddle/pten/tests/kernels/test_cast_dev_api.cc index cb45d827e3be9..90624adeb344e 100644 --- a/paddle/pten/tests/kernels/test_cast_dev_api.cc +++ b/paddle/pten/tests/kernels/test_cast_dev_api.cc @@ -31,9 +31,9 @@ using DDim = paddle::framework::DDim; TEST(DEV_API, cast) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); - pten::DenseTensor dense_x(alloc, + pten::DenseTensor dense_x(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/kernels/test_conj_dev_api.cc b/paddle/pten/tests/kernels/test_conj_dev_api.cc index 3392626dc2ad3..789d55491f368 100644 --- a/paddle/pten/tests/kernels/test_conj_dev_api.cc +++ b/paddle/pten/tests/kernels/test_conj_dev_api.cc @@ -29,9 +29,9 @@ using DDim = paddle::framework::DDim; TEST(DEV_API, conj) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); - pten::DenseTensor dense_x(alloc, + pten::DenseTensor dense_x(alloc.get(), pten::DenseTensorMeta(pten::DataType::COMPLEX64, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/kernels/test_copy_dev_api.cc b/paddle/pten/tests/kernels/test_copy_dev_api.cc index 3095c83d97c98..c4d8c37eb9e0f 100644 --- a/paddle/pten/tests/kernels/test_copy_dev_api.cc +++ b/paddle/pten/tests/kernels/test_copy_dev_api.cc @@ -31,17 +31,17 @@ using DDim = paddle::framework::DDim; // in 'paddle/api' TEST(DEV_API, copy) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); auto dense_src = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({2, 3}), pten::DataLayout::NCHW)); auto* dense_x_data = dense_src->mutable_data(); auto dense_dst = std::make_shared( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({2, 3}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/kernels/test_creation_dev_api.cc b/paddle/pten/tests/kernels/test_creation_dev_api.cc index 4d753f7d09b8e..169a77cf3436b 100644 --- a/paddle/pten/tests/kernels/test_creation_dev_api.cc +++ b/paddle/pten/tests/kernels/test_creation_dev_api.cc @@ -50,9 +50,9 @@ TEST(DEV_API, empty) { TEST(DEV_API, empty_like) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); - pten::DenseTensor dense_x(alloc, + pten::DenseTensor dense_x(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2}), pten::DataLayout::NCHW)); @@ -105,9 +105,9 @@ TEST(DEV_API, full) { TEST(DEV_API, full_like) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); - pten::DenseTensor dense_x(alloc, + pten::DenseTensor dense_x(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/kernels/test_dot_dev_api.cc b/paddle/pten/tests/kernels/test_dot_dev_api.cc index 6e2166cb673bd..a5773b8aa9690 100644 --- a/paddle/pten/tests/kernels/test_dot_dev_api.cc +++ b/paddle/pten/tests/kernels/test_dot_dev_api.cc @@ -29,15 +29,15 @@ using DDim = paddle::framework::DDim; TEST(DEV_API, dot) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); - pten::DenseTensor dense_x(alloc, + pten::DenseTensor dense_x(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); auto* dense_x_data = dense_x.mutable_data(); - pten::DenseTensor dense_y(alloc, + pten::DenseTensor dense_y(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc index bd09ecb770a5d..40998a8d57caa 100644 --- a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc +++ b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc @@ -29,15 +29,15 @@ using DDim = paddle::framework::DDim; TEST(DEV_API, add) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); - pten::DenseTensor dense_x(alloc, + pten::DenseTensor dense_x(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); auto* dense_x_data = dense_x.mutable_data(); - pten::DenseTensor dense_y(alloc, + pten::DenseTensor dense_y(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); @@ -82,15 +82,15 @@ TEST(DEV_API, add) { TEST(DEV_API, subtract) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); - pten::DenseTensor dense_x(alloc, + pten::DenseTensor dense_x(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); auto* dense_x_data = dense_x.mutable_data(); - pten::DenseTensor dense_y(alloc, + pten::DenseTensor dense_y(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); @@ -135,15 +135,15 @@ TEST(DEV_API, subtract) { TEST(DEV_API, divide) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); - pten::DenseTensor dense_x(alloc, + pten::DenseTensor dense_x(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); auto* dense_x_data = dense_x.mutable_data(); - pten::DenseTensor dense_y(alloc, + pten::DenseTensor dense_y(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); @@ -188,15 +188,15 @@ TEST(DEV_API, divide) { TEST(DEV_API, multiply) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); - pten::DenseTensor dense_x(alloc, + pten::DenseTensor dense_x(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); auto* dense_x_data = dense_x.mutable_data(); - pten::DenseTensor dense_y(alloc, + pten::DenseTensor dense_y(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/kernels/test_flatten_dev_api.cc b/paddle/pten/tests/kernels/test_flatten_dev_api.cc index f18e5c050ba70..d66ff468fcf48 100644 --- a/paddle/pten/tests/kernels/test_flatten_dev_api.cc +++ b/paddle/pten/tests/kernels/test_flatten_dev_api.cc @@ -39,10 +39,10 @@ using DDim = paddle::framework::DDim; TEST(DEV_API, flatten) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); pten::DenseTensor dense_x( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2, 2, 3}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/kernels/test_matmul_dev_api.cc b/paddle/pten/tests/kernels/test_matmul_dev_api.cc index 7ac3d19554581..0c1338f195563 100644 --- a/paddle/pten/tests/kernels/test_matmul_dev_api.cc +++ b/paddle/pten/tests/kernels/test_matmul_dev_api.cc @@ -29,16 +29,16 @@ using DDim = paddle::framework::DDim; TEST(DEV_API, dot) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); - DenseTensor dense_x(alloc, + DenseTensor dense_x(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 3}), pten::DataLayout::NCHW)); auto* dense_x_data = dense_x.mutable_data(); - DenseTensor dense_y(alloc, + DenseTensor dense_y(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 3}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/kernels/test_mean_dev_api.cc b/paddle/pten/tests/kernels/test_mean_dev_api.cc index 4b254e7e6c1ac..98782fd5dae0b 100644 --- a/paddle/pten/tests/kernels/test_mean_dev_api.cc +++ b/paddle/pten/tests/kernels/test_mean_dev_api.cc @@ -29,9 +29,9 @@ using DDim = paddle::framework::DDim; TEST(DEV_API, mean) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); - pten::DenseTensor dense_x(alloc, + pten::DenseTensor dense_x(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/kernels/test_reshape_dev_api.cc b/paddle/pten/tests/kernels/test_reshape_dev_api.cc index 0196e1c211004..02139d02de17e 100644 --- a/paddle/pten/tests/kernels/test_reshape_dev_api.cc +++ b/paddle/pten/tests/kernels/test_reshape_dev_api.cc @@ -30,10 +30,10 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(DEV_API, reshape) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); pten::DenseTensor dense_x( - alloc, + alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2, 2, 3}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/kernels/test_scale_dev_api.cc b/paddle/pten/tests/kernels/test_scale_dev_api.cc index fe26f56552b05..02f324deb4cec 100644 --- a/paddle/pten/tests/kernels/test_scale_dev_api.cc +++ b/paddle/pten/tests/kernels/test_scale_dev_api.cc @@ -29,9 +29,9 @@ using DDim = paddle::framework::DDim; TEST(DEV_API, scale) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); - pten::DenseTensor dense_x(alloc, + pten::DenseTensor dense_x(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); @@ -69,9 +69,9 @@ TEST(DEV_API, scale) { TEST(DEV_API, scale_host) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); - pten::DenseTensor dense_x(alloc, + pten::DenseTensor dense_x(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); @@ -79,9 +79,8 @@ TEST(DEV_API, scale_host) { for (size_t i = 0; i < 12; ++i) { dense_x_data[i] = i * 1.0; } - const auto alloc2 = std::make_shared( - paddle::platform::CPUPlace()); - pten::DenseTensor scale(alloc2, + + pten::DenseTensor scale(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({1}), pten::DataLayout::NCHW)); diff --git a/paddle/pten/tests/kernels/test_sum_dev_api.cc b/paddle/pten/tests/kernels/test_sum_dev_api.cc index afaf903063781..312a6ce6100bb 100644 --- a/paddle/pten/tests/kernels/test_sum_dev_api.cc +++ b/paddle/pten/tests/kernels/test_sum_dev_api.cc @@ -29,9 +29,9 @@ using DDim = paddle::framework::DDim; TEST(DEV_API, sum) { // 1. create tensor - const auto alloc = std::make_shared( + const auto alloc = std::make_unique( paddle::platform::CPUPlace()); - pten::DenseTensor dense_x(alloc, + pten::DenseTensor dense_x(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW));