Skip to content

Commit

Permalink
Merge branch 'develop' into fix1
Browse files Browse the repository at this point in the history
  • Loading branch information
co63oc committed Jan 23, 2024
2 parents 073b482 + 7e5e213 commit 5c8c98d
Show file tree
Hide file tree
Showing 240 changed files with 3,649 additions and 1,321 deletions.
2 changes: 1 addition & 1 deletion paddle/cinn/auto_schedule/measure/measure.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class ScheduleBuilder {
};

// This interface defines how to run the built result. Like above
// ScheduleBuilder, a runner shoule be implemented with not bound to a specific
// ScheduleBuilder, a runner should be implemented with not bound to a specific
// task.
class ScheduleRunner {
public:
Expand Down
73 changes: 67 additions & 6 deletions paddle/cinn/backends/codegen_cuda_dev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,31 @@ std::vector<Expr> CodeGenCUDA_Dev::GenerateBufferAliasExprs(
return buffer_alias;
}

std::vector<Expr> FilterDeallocTempBuffers(const std::vector<Expr> &frees) {
std::vector<Expr> filtered;
for (const Expr &free : frees) {
const ir::Free *op = free.As<ir::Free>();
CHECK_NOTNULL(op);
bool has_symbolic_constant = false;
const ir::_Buffer_ *buffer = op->destination.As<ir::_Buffer_>();
for (Expr shape : buffer->shape) {
ir::ir_utils::CollectIRNodes(shape, [&](const Expr *x) {
if (x->as_var()) {
CHECK(x->as_var()->is_symbolic_constant)
<< "var in buffer shape must be symbolic constant.";
has_symbolic_constant = true;
}
return false;
});
}
if (has_symbolic_constant &&
buffer->memory_type == ir::MemoryType::GPULocal) {
filtered.emplace_back(free);
}
}
return filtered;
}

void CodeGenCUDA_Dev::Visit(const ir::_LoweredFunc_ *op) {
// clear names valid within scope when enter a new function
vectorized_tensor_names_.clear();
Expand All @@ -129,6 +154,8 @@ void CodeGenCUDA_Dev::Visit(const ir::_LoweredFunc_ *op) {
auto alloca_temp_buffers = op->PrepareAllocTempBufferExprs();
auto temp_buffer_alias = GenerateBufferAliasExprs(op, op->temp_bufs);
auto alis_var_exprs = op->CudaAliasVarExprs();
auto dealloc_temp_buffers =
FilterDeallocTempBuffers(op->PrepareDeallocTempBufferExprs());

#define APPEND_TO_NEW_BODY(field__) \
new_body.insert(std::end(new_body), std::begin(field__), std::end(field__));
Expand All @@ -137,6 +164,7 @@ void CodeGenCUDA_Dev::Visit(const ir::_LoweredFunc_ *op) {
APPEND_TO_NEW_BODY(alis_var_exprs)

new_body.push_back(op->body);
APPEND_TO_NEW_BODY(dealloc_temp_buffers);

Expr func_body = ir::Block::Make(new_body);

Expand All @@ -148,6 +176,12 @@ void CodeGenCUDA_Dev::Visit(const ir::_LoweredFunc_ *op) {
IrPrinter::Visit(func_body);
}

void CodeGenCUDA_Dev::Visit(const ir::Free *op) {
str_ += "delete [] ";
str_ += op->destination.As<ir::_Buffer_>()->name;
str_ += ";\n";
}

void CodeGenCUDA_Dev::Visit(const ir::_Var_ *op) {
if (utils::Startswith(op->name, "threadIdx") ||
utils::Startswith(op->name, "blockIdx")) {
Expand Down Expand Up @@ -258,6 +292,22 @@ void CodeGenCUDA_Dev::PrintIncludes() { str_ += GetSourceHeader(); }

void CodeGenCUDA_Dev::PrintTempBufferCreation(const ir::Buffer &buffer) {
CHECK_NE(buffer->type(), Void());
// Calculate buffer size and determine if it contains a symbolic constant
Expr buffer_size(1);
for (int i = 0; i < buffer->shape.size(); i++) {
buffer_size = buffer_size * buffer->shape[i];
}
optim::Simplify(&buffer_size);
bool has_symbolic_constant = false;
ir::ir_utils::CollectIRNodes(buffer_size, [&](const Expr *x) {
if (x->as_var()) {
CHECK(x->as_var()->is_symbolic_constant)
<< "var in buffer size must be symbolic constant.";
has_symbolic_constant = true;
}
return false;
});
// print func of static allocation
auto print_gpu_memory = [&](const std::string &mark) {
str_ += mark;
str_ += GetTypeRepr(buffer->dtype);
Expand All @@ -266,21 +316,32 @@ void CodeGenCUDA_Dev::PrintTempBufferCreation(const ir::Buffer &buffer) {
str_ += " ";

str_ += "[ ";
Expr buffer_size(1);
for (int i = 0; i < buffer->shape.size(); i++) {
buffer_size = buffer_size * buffer->shape[i];
}
optim::Simplify(&buffer_size);
IrPrinter::Visit(buffer_size);
str_ += " ]";
};
// print func of dynamic allocation
auto print_gpu_local_memory_dynamic_allocation = [&]() {
str_ += GetTypeRepr(buffer->dtype);
str_ += " *";
str_ += buffer->name;
str_ += " = new ";
str_ += GetTypeRepr(buffer->dtype);
str_ += "[ ";
IrPrinter::Visit(buffer_size);
str_ += " ]";
};
// print
switch (buffer->memory_type) {
case ir::MemoryType::GPUShared:
print_gpu_memory("__shared__ ");
break;

case ir::MemoryType::GPULocal:
print_gpu_memory("");
if (has_symbolic_constant) {
print_gpu_local_memory_dynamic_allocation();
} else {
print_gpu_memory("");
}
break;

default:
Expand Down
2 changes: 2 additions & 0 deletions paddle/cinn/backends/codegen_cuda_dev.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class CodeGenCUDA_Dev : public CodeGenC {
protected:
void Visit(const ir::_Var_* op) override;
void Visit(const ir::_LoweredFunc_* op) override;
void Visit(const ir::Free* op) override;
void Visit(const ir::Min* op) override;
void Visit(const ir::Max* op) override;
void Visit(const ir::Alloc* op) override;
Expand Down Expand Up @@ -113,6 +114,7 @@ class CodeGenCUDA_Dev : public CodeGenC {
// prefix
std::unordered_set<std::string> vectorized_tensor_names_;
static const std::string source_header_;
std::vector<ir::Buffer> dynamic_alloc_buffers_;
};

} // namespace backends
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/backends/codegen_cuda_host.cc
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ llvm::Value* CodeGenCUDA_Host::LowerHostFunc(const ir::_LoweredFunc_* func) {

llvm::Value* CodeGenCUDA_Host::LowerParseArgsValueCall(
const ir::Call* call_ir) {
auto ret_type = CinnTypeToLLVMType(Int(32), m_);
auto ret_type = CinnTypeToLLVMType(Int(64), m_);
std::vector<llvm::Type*> args_type;
CHECK_EQ(call_ir->read_args.size(), 2);
CHECK(call_ir->read_args[0].is_var() &&
Expand Down
7 changes: 4 additions & 3 deletions paddle/cinn/backends/codegen_cuda_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,16 @@ void detail::CollectBucketStrategyHostFunctionVisitor::ProcessArgs(
for (int i = 0; i < args.size(); ++i) {
if (args[i].is_var()) {
ir::Expr call_get_value_in_kernel_args =
ir::Call::Make(Int(32),
ir::Call::Make(Int(64),
runtime::intrinsic::get_value_in_cuda_kernel_args,
{kernel_args_, ir::Expr(i)},
{},
ir::CallType::Extern,
ir::FunctionRef(),
0);
ir::Expr stmt = ir::Let::Make(ir::Expr(args[i].var_arg()),
call_get_value_in_kernel_args);
ir::Expr let_symbol = ir::Expr(args[i].var_arg());
let_symbol->set_type(type_of<int64_t>());
ir::Expr stmt = ir::Let::Make(let_symbol, call_get_value_in_kernel_args);
arg_defs_.push_back(stmt);
}
}
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/backends/codegen_cuda_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ struct CollectBucketStrategyHostFunctionVisitor
kernel_args_(KERNEL_ARGS, type_of<void*>()),
kernel_args_num_(KERNEL_ARGS_NUM, type_of<int>()),
kernel_stream_(KERNEL_STREAM, type_of<void*>()),
tensor_shape_args_(TENSOR_SHAPE_ARGS, type_of<int32_t**>()) {}
tensor_shape_args_(TENSOR_SHAPE_ARGS, type_of<int64_t**>()) {}

std::tuple<ir::Module, ir::Module> operator()(Expr* expr) {
ir::IRMutator<>::Visit(expr, expr);
Expand Down
7 changes: 2 additions & 5 deletions paddle/cinn/common/broadcast_tree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ std::optional<symbol::Broadcastable<symbol::DimExpr>> GetFirstCstrBroadcastable(
}
}
if (lhs_symbol.has_value() && rhs_symbol.has_value()) {
CHECK(lhs_symbol != rhs_symbol);
CHECK(lhs_symbol != rhs_symbol)
<< lhs_symbol.value() << " != " << rhs_symbol.value();
ret = symbol::Broadcastable<symbol::DimExpr>{lhs_symbol.value(),
rhs_symbol.value()};
return true;
Expand Down Expand Up @@ -341,8 +342,4 @@ std::string ToTxtString(const BroadcastTree& tree) {
tree.variant());
}

std::ostream& operator<<(std::ostream& os, const BroadcastTree& tree) {
os << ToTxtString(tree);
}

} // namespace cinn::common
2 changes: 0 additions & 2 deletions paddle/cinn/common/broadcast_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,4 @@ BroadcastTree ConstructBroadcastTree(const BroadcastLeaf& leaves);

std::string ToTxtString(const BroadcastTree&);

std::ostream& operator<<(std::ostream& os, const BroadcastTree& tree);

} // namespace cinn::common
6 changes: 5 additions & 1 deletion paddle/cinn/common/dim_expr_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ struct DimExprToIrExprVisitor {
ir::Expr operator()(const int64_t& dim) { return ir::Expr(dim); }

ir::Expr operator()(const std::string& dim_expr) {
Var x = ir::_Var_::Make(dim_expr, Int(64));
Var x = ir::_Var_::Make(ir::Expr(static_cast<int64_t>(0)),
ir::Expr(INT64_MAX),
dim_expr,
/* is_reduce = */ false,
/* is_symbolic_constant = */ true);
return x;
}

Expand Down
25 changes: 21 additions & 4 deletions paddle/cinn/common/dim_expr_converter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,27 @@ TEST(Convert, AddExpr) {

ir::Expr expr1 =
ir::Add::Make(ir::Expr(std::int64_t(4)), ir::Expr(std::int64_t(5)));
ir::Expr dst_expr = ir::Add::Make(expr1, ir::_Var_::Make("sym_0", Int(64)));
ir::Expr dst_expr =
ir::Add::Make(expr1,
ir::_Var_::Make(ir::Expr(static_cast<int64_t>(0)),
ir::Expr(INT64_MAX),
"sym_0",
/* is_reduce = */ false,
/* is_symbolic_constant = */ true));
ASSERT_TRUE(MathEqual(src_expr, dst_expr));
}

TEST(Convert, SubExpr) {
DimExpr dim_expr = DimExpr(4) - DimExpr("sym_0");
ir::Expr src_expr = DimExprConverter().ConvertToIrExpr(dim_expr);

ir::Expr expr1 = ir::Sub::Make(ir::Expr(std::int64_t(0)),
ir::_Var_::Make("sym_0", Int(64)));
ir::Expr expr1 =
ir::Sub::Make(ir::Expr(std::int64_t(0)),
ir::_Var_::Make(ir::Expr(static_cast<int64_t>(0)),
ir::Expr(INT64_MAX),
"sym_0",
/* is_reduce = */ false,
/* is_symbolic_constant = */ true));
ir::Expr dst_expr = ir::Add::Make(ir::Expr(std::int64_t(4)), expr1);
ASSERT_TRUE(MathEqual(src_expr, dst_expr));
}
Expand All @@ -52,7 +63,13 @@ TEST(Convert, MulExpr) {

ir::Expr expr1 =
ir::Mul::Make(ir::Expr(std::int64_t(4)), ir::Expr(std::int64_t(5)));
ir::Expr dst_expr = ir::Mul::Make(expr1, ir::_Var_::Make("sym_0", Int(64)));
ir::Expr dst_expr =
ir::Mul::Make(expr1,
ir::_Var_::Make(ir::Expr(static_cast<int64_t>(0)),
ir::Expr(INT64_MAX),
"sym_0",
/* is_reduce = */ false,
/* is_symbolic_constant = */ true));
ASSERT_TRUE(MathEqual(src_expr, dst_expr));
}

Expand Down
6 changes: 5 additions & 1 deletion paddle/cinn/common/ir_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,12 @@ Expr IndiceToAbsOffset(const std::vector<Expr> &shape,
VLOG(3) << "indices is : " << utils::Join(indices, ",");
CHECK_LE(shape.size(), indices.size());
Expr res;
ir::TryElevateInt32ToInt64(shape);
for (int i = 0; i < shape.size(); i++) {
CHECK_EQ(shape[i].type(), Int(32));
CHECK(shape[i].type() == Int(64) || shape[i].type() == Int(32))
<< "The shape data type currently supports only int32 or int64, but "
"the current data type of shape["
<< i << "] is " << shape[i].type();
Expr indice_prod = indices[i];
optim::SimplifyCast(&indice_prod);
for (int j = i + 1; j < shape.size(); j++) {
Expand Down
6 changes: 6 additions & 0 deletions paddle/cinn/common/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,12 @@ inline Type type_of<int32_t**>() {
return x;
}
template <>
inline Type type_of<int64_t**>() {
Type x = Int(64);
x.set_cpp_handle2();
return x;
}
template <>
inline Type type_of<void*>() {
Type x = type_of<void>();
x.set_cpp_handle();
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/frontend/decomposer_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class DecomposerContext {
}
if (new_var->type != ori_var->type) {
LOG(FATAL)
<< "The output type shoule be equal to the original. But received : "
<< "The output type should be equal to the original. But received : "
<< new_var->id << ".type=" << new_var->type
<< " and the original var " << ori_var->id
<< ".type=" << ori_var->type;
Expand Down
Loading

0 comments on commit 5c8c98d

Please sign in to comment.