Merge branch 'main' into pr/8389

halide · Aug 23, 2024 · 5bf7f9b · 5bf7f9b
2 parents 66cdfbf + 45518ac
commit 5bf7f9b
Show file tree

Hide file tree

Showing 67 changed files with 210 additions and 188 deletions.
diff --git a/cmake/BundleStatic.cmake b/cmake/BundleStatic.cmake
@@ -51,11 +51,14 @@ function(bundle_static TARGET)
     # Repeatedly expand and flatten: T ~> T, T.INTERFACE_LINK_LIBRARIES
     foreach (i RANGE 5)
         _bundle_static_replace(
-            cmd "(.+)" "\\1;$<$<TARGET_EXISTS:\\1>:$<TARGET_PROPERTY:\\1,INTERFACE_LINK_LIBRARIES>>"
+            cmd "(.+)" "$<$<TARGET_EXISTS:\\1>:\\1;$<TARGET_PROPERTY:\\1,INTERFACE_LINK_LIBRARIES>>"
         )
         set(cmd "$<LIST:REMOVE_DUPLICATES,$<GENEX_EVAL:${cmd}>>")
     endforeach ()
 
+    # Ensure we are only including targets
+    _bundle_static_replace(cmd "(.+)" "$<TARGET_NAME_IF_EXISTS:\\1>")
+
     # Rewrite T ~> T^T.TYPE  -- we use ^ as a delimiter
     _bundle_static_replace(cmd "(.+)" "\\1^$<TARGET_PROPERTY:\\1,TYPE>")
     set(cmd "$<GENEX_EVAL:${cmd}>")

diff --git a/python_bindings/src/halide/halide_/PyRDom.cpp b/python_bindings/src/halide/halide_/PyRDom.cpp
@@ -5,6 +5,7 @@
 namespace Halide {
 namespace PythonBindings {
 
+namespace {
 void define_rvar(py::module &m) {
     auto rvar_class =
         py::class_<RVar>(m, "RVar")
@@ -24,6 +25,7 @@ void define_rvar(py::module &m) {
 
     add_binary_operators(rvar_class);
 }
+}  // namespace
 
 void define_rdom(py::module &m) {
     define_rvar(m);

diff --git a/src/AddImageChecks.cpp b/src/AddImageChecks.cpp
@@ -526,7 +526,7 @@ Stmt add_image_checks_inner(Stmt s,
                         << "as the first output buffer.\n";
 
                     stride_constrained = param.stride_constraint(i);
-                } else if (image.defined() && (int)i < image.dimensions()) {
+                } else if (image.defined() && i < image.dimensions()) {
                     stride_constrained = image.dim(i).stride();
                 }
 
@@ -543,7 +543,7 @@ Stmt add_image_checks_inner(Stmt s,
                 } else {
                     extent_constrained = Variable::make(Int(32), extent0_name);
                 }
-            } else if (image.defined() && (int)i < image.dimensions()) {
+            } else if (image.defined() && i < image.dimensions()) {
                 stride_constrained = image.dim(i).stride();
                 extent_constrained = image.dim(i).extent();
                 min_constrained = image.dim(i).min();

diff --git a/src/BoundaryConditions.cpp b/src/BoundaryConditions.cpp
@@ -14,7 +14,7 @@ Func repeat_edge(const Func &source,
 
     std::vector<Expr> actuals;
     for (size_t i = 0; i < bounds.size(); i++) {
-        Var arg_var = args[i];
+        const Var &arg_var = args[i];
         Expr min = bounds[i].min;
         Expr extent = bounds[i].extent;
 
@@ -39,16 +39,15 @@ Func repeat_edge(const Func &source,
 
 Func constant_exterior(const Func &source, const Tuple &value,
                        const Region &bounds) {
-    std::vector<Var> source_args = source.args();
-    std::vector<Var> args(source_args);
+    std::vector<Var> args(source.args());
     user_assert(args.size() >= bounds.size())
         << "constant_exterior called with more bounds (" << bounds.size()
-        << ") than dimensions (" << source_args.size()
+        << ") than dimensions (" << args.size()
         << ") Func " << source.name() << " has.\n";
 
     Expr out_of_bounds = cast<bool>(false);
     for (size_t i = 0; i < bounds.size(); i++) {
-        Var arg_var = source_args[i];
+        const Var &arg_var = args[i];
         Expr min = bounds[i].min;
         Expr extent = bounds[i].extent;
 
@@ -91,7 +90,7 @@ Func repeat_image(const Func &source,
 
     std::vector<Expr> actuals;
     for (size_t i = 0; i < bounds.size(); i++) {
-        Var arg_var = args[i];
+        const Var &arg_var = args[i];
         Expr min = bounds[i].min;
         Expr extent = bounds[i].extent;
 
@@ -146,7 +145,7 @@ Func mirror_image(const Func &source,
 
     std::vector<Expr> actuals;
     for (size_t i = 0; i < bounds.size(); i++) {
-        Var arg_var = args[i];
+        const Var &arg_var = args[i];
 
         Expr min = bounds[i].min;
         Expr extent = bounds[i].extent;
@@ -187,7 +186,7 @@ Func mirror_interior(const Func &source,
 
     std::vector<Expr> actuals;
     for (size_t i = 0; i < bounds.size(); i++) {
-        Var arg_var = args[i];
+        const Var &arg_var = args[i];
 
         Expr min = bounds[i].min;
         Expr extent = bounds[i].extent;

diff --git a/src/Bounds.cpp b/src/Bounds.cpp
@@ -1857,12 +1857,6 @@ Interval bounds_of_expr_in_scope_with_indent(const Expr &expr, const Scope<Inter
     return b.interval;
 }
 
-}  // namespace
-
-Interval bounds_of_expr_in_scope(const Expr &expr, const Scope<Interval> &scope, const FuncValueBounds &fb, bool const_bound) {
-    return bounds_of_expr_in_scope_with_indent(expr, scope, fb, const_bound, 0);
-}
-
 Region region_union(const Region &a, const Region &b) {
     internal_assert(a.size() == b.size()) << "Mismatched dimensionality in region union\n";
     Region result;
@@ -1877,6 +1871,12 @@ Region region_union(const Region &a, const Region &b) {
     return result;
 }
 
+}  // namespace
+
+Interval bounds_of_expr_in_scope(const Expr &expr, const Scope<Interval> &scope, const FuncValueBounds &fb, bool const_bound) {
+    return bounds_of_expr_in_scope_with_indent(expr, scope, fb, const_bound, 0);
+}
+
 void merge_boxes(Box &a, const Box &b) {
     if (b.empty()) {
         return;
@@ -3085,8 +3085,6 @@ class BoxesTouched : public IRGraphVisitor {
     }
 };
 
-}  // namespace
-
 map<string, Box> boxes_touched(const Expr &e, Stmt s, bool consider_calls, bool consider_provides,
                                const string &fn, const Scope<Interval> &scope, const FuncValueBounds &fb) {
     if (!fn.empty() && s.defined()) {
@@ -3275,6 +3273,7 @@ Box box_touched(const Expr &e, Stmt s, bool consider_calls, bool consider_provid
     internal_assert(boxes.size() <= 1);
     return boxes[fn];
 }
+}  // namespace
 
 map<string, Box> boxes_required(const Expr &e, const Scope<Interval> &scope, const FuncValueBounds &fb) {
     return boxes_touched(e, Stmt(), true, false, "", scope, fb);
@@ -3324,6 +3323,7 @@ Box box_touched(Stmt s, const string &fn, const Scope<Interval> &scope, const Fu
     return box_touched(Expr(), std::move(s), true, true, fn, scope, fb);
 }
 
+namespace {
 // Compute interval of all possible function's values (default + specialized values)
 Interval compute_pure_function_definition_value_bounds(
     const Definition &def, const Scope<Interval> &scope, const FuncValueBounds &fb, int dim) {
@@ -3338,14 +3338,15 @@ Interval compute_pure_function_definition_value_bounds(
     }
     return result;
 }
+}  // namespace
 
 FuncValueBounds compute_function_value_bounds(const vector<string> &order,
                                               const map<string, Function> &env) {
     FuncValueBounds fb;
 
     for (const auto &func_name : order) {
         Function f = env.find(func_name)->second;
-        const vector<string> f_args = f.args();
+        const vector<string> &f_args = f.args();
         for (int j = 0; j < f.outputs(); j++) {
             pair<string, int> key = {f.name(), j};
 

diff --git a/src/CPlusPlusMangle.cpp b/src/CPlusPlusMangle.cpp
@@ -44,8 +44,6 @@ Type non_null_void_star_type() {
     return Handle(1, &t);
 }
 
-}  // namespace
-
 namespace WindowsMangling {
 
 struct PreviousDeclarations {
@@ -615,6 +613,8 @@ std::string cplusplus_function_mangled_name(const std::string &name, const std::
 
 }  // namespace ItaniumABIMangling
 
+}  // namespace
+
 std::string cplusplus_function_mangled_name(const std::string &name, const std::vector<std::string> &namespaces,
                                             Type return_type, const std::vector<ExternFuncArgument> &args,
                                             const Target &target) {

diff --git a/src/CodeGen_C.cpp b/src/CodeGen_C.cpp
@@ -797,7 +797,7 @@ void CodeGen_C::emit_metadata_getter(const std::string &function_name,
         stream << get_indent() << kind_names[arg.kind] << ",\n";
         stream << get_indent() << (int)arg.dimensions << ",\n";
         internal_assert(arg.type.code() < sizeof(type_code_names) / sizeof(type_code_names[0]));
-        stream << get_indent() << "{" << type_code_names[arg.type.code()] << ", " << (int)arg.type.bits() << ", " << (int)arg.type.lanes() << "},\n";
+        stream << get_indent() << "{" << type_code_names[arg.type.code()] << ", " << arg.type.bits() << ", " << arg.type.lanes() << "},\n";
         stream << get_indent() << "scalar_def_" << legalized_name << ",\n";
         stream << get_indent() << "scalar_min_" << legalized_name << ",\n";
         stream << get_indent() << "scalar_max_" << legalized_name << ",\n";
@@ -873,8 +873,8 @@ void CodeGen_C::emit_constexpr_function_info(const std::string &function_name,
         const auto name = map_name(arg.name);
 
         stream << get_indent() << "{\"" << name << "\", " << kind_names[arg.kind] << ", " << (int)arg.dimensions
-               << ", halide_type_t{" << type_code_names[arg.type.code()] << ", " << (int)arg.type.bits()
-               << ", " << (int)arg.type.lanes() << "}},\n";
+               << ", halide_type_t{" << type_code_names[arg.type.code()] << ", " << arg.type.bits()
+               << ", " << arg.type.lanes() << "}},\n";
     }
     indent -= 1;
     stream << get_indent() << "}};\n";

diff --git a/src/CodeGen_D3D12Compute_Dev.cpp b/src/CodeGen_D3D12Compute_Dev.cpp
@@ -958,7 +958,7 @@ void CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::visit(const FloatImm *op)
     // have seen division-by-zero shader warnings, and we postulated that it
     // could be indirectly related to compiler assumptions on signed integer
     // overflow when float_from_bits() is called, but we don't know for sure
-    return CodeGen_GPU_C::visit(op);
+    CodeGen_GPU_C::visit(op);
 }
 
 void CodeGen_D3D12Compute_Dev::add_kernel(Stmt s,
@@ -1146,10 +1146,12 @@ void CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::add_kernel(Stmt s,
         using IRVisitor::visit;
         void visit(const For *loop) override {
             if (!is_gpu(loop->for_type)) {
-                return loop->body.accept(this);
+                loop->body.accept(this);
+                return;
             }
             if (loop->for_type != ForType::GPUThread) {
-                return loop->body.accept(this);
+                loop->body.accept(this);
+                return;
             }
             internal_assert(is_const_zero(loop->min));
             int index = thread_loop_workgroup_index(loop->name);

diff --git a/src/CodeGen_Hexagon.cpp b/src/CodeGen_Hexagon.cpp
@@ -1044,7 +1044,7 @@ Value *CodeGen_Hexagon::interleave_vectors(const vector<llvm::Value *> &v) {
             // Break them into native vectors, use vshuffvdd, and
             // concatenate the shuffled results.
             llvm::Type *native2_ty = get_vector_type(element_ty, native_elements * 2);
-            Value *bytes = codegen(-static_cast<int>(element_bits / 8));
+            Value *bytes = codegen(-(element_bits / 8));
             vector<Value *> ret;
             for (int i = 0; i < result_elements / 2; i += native_elements) {
                 Value *a_i = slice_vector(a, i, native_elements);
@@ -1147,7 +1147,7 @@ Value *CodeGen_Hexagon::shuffle_vectors(Value *a, Value *b,
     llvm::Type *b_ty = b->getType();
     internal_assert(a_ty == b_ty);
 
-    int a_elements = static_cast<int>(get_vector_num_elements(a_ty));
+    int a_elements = get_vector_num_elements(a_ty);
 
     llvm::Type *element_ty = get_vector_element_type(a->getType());
     internal_assert(element_ty);

diff --git a/src/CodeGen_Internal.cpp b/src/CodeGen_Internal.cpp
@@ -292,6 +292,7 @@ Expr lower_int_uint_mod(const Expr &a, const Expr &b) {
     }
 }
 
+namespace {
 std::pair<Expr, Expr> unsigned_long_div_mod_round_to_zero(Expr &num, const Expr &den,
                                                           const uint64_t *upper_bound) {
     internal_assert(num.type() == den.type());
@@ -329,6 +330,7 @@ std::pair<Expr, Expr> unsigned_long_div_mod_round_to_zero(Expr &num, const Expr
     }
     return {q, r};
 }
+}  // namespace
 
 std::pair<Expr, Expr> long_div_mod_round_to_zero(const Expr &num, const Expr &den,
                                                  const uint64_t *max_abs) {
@@ -557,6 +559,7 @@ Expr lower_round_to_nearest_ties_to_even(const Expr &x) {
     return common_subexpression_elimination(a - correction);
 }
 
+namespace {
 bool get_md_bool(llvm::Metadata *value, bool &result) {
     if (!value) {
         return false;
@@ -585,6 +588,7 @@ bool get_md_string(llvm::Metadata *value, std::string &result) {
     }
     return false;
 }
+}  // namespace
 
 void get_target_options(const llvm::Module &module, llvm::TargetOptions &options) {
     bool use_soft_float_abi = false;

diff --git a/src/CodeGen_LLVM.cpp b/src/CodeGen_LLVM.cpp
@@ -570,7 +570,7 @@ std::unique_ptr<llvm::Module> CodeGen_LLVM::compile(const Module &input) {
     // Define all functions
     int idx = 0;
     for (const auto &f : input.functions()) {
-        const auto names = function_names[idx++];
+        const auto &names = function_names[idx++];
 
         run_with_large_stack([&]() {
             compile_func(f, names.simple_name, names.extern_name);
@@ -1910,7 +1910,7 @@ Value *CodeGen_LLVM::codegen_buffer_pointer(const string &buffer, Halide::Type t
 
 Value *CodeGen_LLVM::codegen_buffer_pointer(Value *base_address, Halide::Type type, Expr index) {
     // Promote index to 64-bit on targets that use 64-bit pointers.
-    llvm::DataLayout d(module.get());
+    const llvm::DataLayout &d = module->getDataLayout();
     if (promote_indices() && d.getPointerSize() == 8) {
         index = promote_64(index);
     }
@@ -1951,7 +1951,7 @@ Value *CodeGen_LLVM::codegen_buffer_pointer(Value *base_address, Halide::Type ty
     }
 
     // Promote index to 64-bit on targets that use 64-bit pointers.
-    llvm::DataLayout d(module.get());
+    const llvm::DataLayout &d = module->getDataLayout();
     if (d.getPointerSize() == 8) {
         llvm::Type *index_type = index->getType();
         llvm::Type *desired_index_type = i64_t;
@@ -3228,7 +3228,7 @@ void CodeGen_LLVM::visit(const Call *op) {
         builder->SetInsertPoint(global_not_inited_bb);
         llvm::Value *selected_value = nullptr;
         for (int i = sub_fns.size() - 1; i >= 0; i--) {
-            const auto sub_fn = sub_fns[i];
+            const auto &sub_fn = sub_fns[i];
             if (!selected_value) {
                 selected_value = sub_fn.fn_ptr;
             } else {
@@ -3286,7 +3286,7 @@ void CodeGen_LLVM::visit(const Call *op) {
     } else if (op->is_intrinsic(Call::undef)) {
         user_error << "undef not eliminated before code generation. Please report this as a Halide bug.\n";
     } else if (op->is_intrinsic(Call::size_of_halide_buffer_t)) {
-        llvm::DataLayout d(module.get());
+        const llvm::DataLayout &d = module->getDataLayout();
         value = ConstantInt::get(i32_t, (int)d.getTypeAllocSize(halide_buffer_t_type));
     } else if (op->is_intrinsic(Call::strict_float)) {
         IRBuilder<llvm::ConstantFolder, llvm::IRBuilderDefaultInserter>::FastMathFlagGuard guard(*builder);
@@ -4466,7 +4466,7 @@ Value *CodeGen_LLVM::create_alloca_at_entry(llvm::Type *t, int n, bool zero_init
     Value *size = ConstantInt::get(i32_t, n);
     AllocaInst *ptr = builder->CreateAlloca(t, size, name);
     int align = native_vector_bits() / 8;
-    llvm::DataLayout d(module.get());
+    const llvm::DataLayout &d = module->getDataLayout();
     int allocated_size = n * (int)d.getTypeAllocSize(t);
     if (t->isVectorTy() || n > 1) {
         ptr->setAlignment(llvm::Align(align));

diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp
@@ -20,14 +20,6 @@
 
 #include <fstream>
 
-// This is declared in NVPTX.h, which is not exported. Ugly, but seems better than
-// hardcoding a path to the .h file.
-#ifdef WITH_NVPTX
-namespace llvm {
-FunctionPass *createNVVMReflectPass(const StringMap<int> &Mapping);
-}
-#endif
-
 namespace Halide {
 namespace Internal {
 

diff --git a/src/CodeGen_PowerPC.cpp b/src/CodeGen_PowerPC.cpp
@@ -129,7 +129,7 @@ void CodeGen_PowerPC::visit(const Min *op) {
             return;
         }
     }
-    return CodeGen_Posix::visit(op);
+    CodeGen_Posix::visit(op);
 }
 
 void CodeGen_PowerPC::visit(const Max *op) {
@@ -139,7 +139,7 @@ void CodeGen_PowerPC::visit(const Max *op) {
             return;
         }
     }
-    return CodeGen_Posix::visit(op);
+    CodeGen_Posix::visit(op);
 }
 
 string CodeGen_PowerPC::mcpu_target() const {