Merge pull request #1172 from boostorg/NVRTC_2

NVRTC Batch 2
boostorg · Aug 12, 2024 · 06d11fb · 06d11fb
2 parents 66becad + 687125c
commit 06d11fb
Show file tree

Hide file tree

Showing 18 changed files with 1,873 additions and 26 deletions.
diff --git a/include/boost/math/special_functions/cbrt.hpp b/include/boost/math/special_functions/cbrt.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,6 +11,8 @@
 #pragma once
 #endif
 
+#ifndef __CUDACC_RTC__
+
 #include <boost/math/tools/config.hpp>
 #include <boost/math/tools/rational.hpp>
 #include <boost/math/policies/error_handling.hpp>
@@ -171,6 +174,28 @@ BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type cbrt(T z)
 } // namespace math
 } // namespace boost
 
+#else
+
+namespace boost {
+namespace math {
+
+template <typename T>
+__host__ __device__ T cbrt(T x)
+{
+   return ::cbrt(x);
+}
+
+template <>
+__host__ __device__ float cbrt(float x)
+{
+   return ::cbrtf(x);
+}
+
+} // namespace math
+} // namespace boost
+
+#endif // NVRTC
+
 #endif // BOOST_MATH_SF_CBRT_HPP
 
 

diff --git a/include/boost/math/special_functions/fpclassify.hpp b/include/boost/math/special_functions/fpclassify.hpp
@@ -1,5 +1,6 @@
 //  Copyright John Maddock 2005-2008.
 //  Copyright (c) 2006-2008 Johan Rade
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -11,13 +12,17 @@
 #pragma once
 #endif
 
-#include <limits>
-#include <type_traits>
-#include <cmath>
 #include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_NVRTC_ENABLED
+
 #include <boost/math/tools/real_cast.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/detail/fp_traits.hpp>
+#include <limits>
+#include <type_traits>
+#include <cmath>
+
 /*!
   \file fpclassify.hpp
   \brief Classify floating-point value as normal, subnormal, zero, infinite, or NaN.
@@ -711,5 +716,82 @@ inline bool (isnan)(__float128 x)
 
 } // namespace math
 } // namespace boost
+
+#else // Special handling generally using the CUDA library
+
+#include <boost/math/tools/type_traits.hpp>
+
+namespace boost {
+namespace math {
+
+template <typename T, boost::math::enable_if_t<boost::math::is_integral_v<T>, bool> = true>
+inline BOOST_MATH_GPU_ENABLED bool isnan(T x)
+{
+   return false;
+}
+
+template <typename T, boost::math::enable_if_t<!boost::math::is_integral_v<T>, bool> = true>
+inline BOOST_MATH_GPU_ENABLED bool isnan(T x)
+{
+   return ::isnan(x);
+}
+
+template <typename T, boost::math::enable_if_t<boost::math::is_integral_v<T>, bool> = true>
+inline BOOST_MATH_GPU_ENABLED bool isinf(T x)
+{
+   return false;
+}
+
+template <typename T, boost::math::enable_if_t<!boost::math::is_integral_v<T>, bool> = true>
+inline BOOST_MATH_GPU_ENABLED bool isinf(T x)
+{
+   return ::isinf(x);
+}
+
+template <typename T, boost::math::enable_if_t<boost::math::is_integral_v<T>, bool> = true>
+inline BOOST_MATH_GPU_ENABLED bool isfinite(T x)
+{
+   return true;
+}
+
+template <typename T, boost::math::enable_if_t<!boost::math::is_integral_v<T>, bool> = true>
+inline BOOST_MATH_GPU_ENABLED bool isfinite(T x)
+{
+   return ::isfinite(x);
+}
+
+template <typename T>
+inline BOOST_MATH_GPU_ENABLED bool isnormal(T x)
+{
+   return x != static_cast<T>(0) && x != static_cast<T>(-0) && 
+            !boost::math::isnan(x) && 
+            !boost::math::isinf(x);
+}
+
+// We skip the check for FP_SUBNORMAL since they are not supported on these platforms
+template <typename T>
+inline BOOST_MATH_GPU_ENABLED int fpclassify(T x)
+{
+   if (boost::math::isnan(x))
+   {
+      return BOOST_MATH_FP_NAN;
+   }
+   else if (boost::math::isinf(x))
+   {
+      return BOOST_MATH_FP_INFINITE;
+   }
+   else if (x == static_cast<T>(0) || x == static_cast<T>(-0))
+   {
+      return BOOST_MATH_FP_ZERO;
+   }
+
+   return BOOST_MATH_FP_NORMAL;
+}
+
+} // Namespace math
+} // Namespace boost
+
+#endif // BOOST_MATH_NVRTC_ENABLED
+
 #endif // BOOST_MATH_FPCLASSIFY_HPP
 
diff --git a/include/boost/math/special_functions/gamma.hpp b/include/boost/math/special_functions/gamma.hpp
@@ -110,7 +110,7 @@ BOOST_MATH_GPU_ENABLED T sinpx(T z)
 // tgamma(z), with Lanczos support:
 //
 template <class T, class Policy, class Lanczos>
-BOOST_MATH_GPU_ENABLED T gamma_imp_final(T z, const Policy& pol, const Lanczos& l)
+BOOST_MATH_GPU_ENABLED T gamma_imp_final(T z, const Policy& pol, const Lanczos&)
 {
    BOOST_MATH_STD_USING
 
@@ -2284,18 +2284,29 @@ BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
 
 #else
 
+#include <boost/math/tools/config.hpp>
+
 namespace boost {
 namespace math {
 
-inline __host__ __device__ float tgamma(float x) { return ::tgammaf(x); }
-inline __host__ __device__ double tgamma(double x) { return ::tgamma(x); }
+inline BOOST_MATH_GPU_ENABLED float tgamma(float x) { return ::tgammaf(x); }
+inline BOOST_MATH_GPU_ENABLED double tgamma(double x) { return ::tgamma(x); }
 
 template <typename T, typename Policy>
-inline __host__ __device__ T tgamma(T x, const Policy&)
+inline BOOST_MATH_GPU_ENABLED T tgamma(T x, const Policy&)
 {
    return boost::math::tgamma(x);
 }
 
+inline BOOST_MATH_GPU_ENABLED float lgamma(float x) { return ::lgammaf(x); }
+inline BOOST_MATH_GPU_ENABLED double lgamma(double x) { return ::lgamma(x); }
+
+template <typename T, typename Policy>
+inline BOOST_MATH_GPU_ENABLED T lgamma(T x, const Policy&)
+{
+   return boost::math::lgamma(x);
+}
+
 } // namespace math
 } // namespace boost
 

diff --git a/include/boost/math/special_functions/sign.hpp b/include/boost/math/special_functions/sign.hpp
@@ -14,6 +14,8 @@
 #pragma once
 #endif
 
+#ifndef __CUDACC_RTC__
+
 #include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/detail/fp_traits.hpp>
@@ -192,6 +194,47 @@ BOOST_MATH_GPU_ENABLED inline typename tools::promote_args_permissive<T, U>::typ
 } // namespace math
 } // namespace boost
 
+#else // NVRTC alias versions
+
+#include <boost/math/tools/config.hpp>
+
+namespace boost {
+namespace math {
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED int signbit(T x)
+{
+    return ::signbit(x);
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED T changesign(T x)
+{
+    return -x;
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED T copysign(T x, T y)
+{
+    return ::copysign(x, y);
+}
+
+template <>
+BOOST_MATH_GPU_ENABLED float copysign(float x, float y)
+{
+    return ::copysignf(x, y);
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED T sign(T z)
+{
+    return (z == 0) ? 0 : ::signbit(z) ? -1 : 1;
+}
+
+} // namespace math
+} // namespace boost
+
+#endif // __CUDACC_RTC__
 
 #endif // BOOST_MATH_TOOLS_SIGN_HPP
 

diff --git a/include/boost/math/tools/config.hpp b/include/boost/math/tools/config.hpp
@@ -11,6 +11,8 @@
 #pragma once
 #endif
 
+#ifndef __CUDACC_RTC__
+
 #include <boost/math/tools/is_standalone.hpp>
 
 // Minimum language standard transition
@@ -741,7 +743,7 @@ BOOST_MATH_GPU_ENABLED constexpr T cuda_safe_max(const T& a, const T& b) { retur
 // See if we can inline them instead
 
 #if defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L
-#  define BOOST_MATH_STATIC_CONSTEXPR inline constexpr
+#  define BOOST_MATH_INLINE_CONSTEXPR inline constexpr
 #  define BOOST_MATH_STATIC static
 #  ifndef BOOST_MATH_HAS_GPU_SUPPORT
 #    define BOOST_MATH_STATIC_LOCAL_VARIABLE static
@@ -750,16 +752,55 @@ BOOST_MATH_GPU_ENABLED constexpr T cuda_safe_max(const T& a, const T& b) { retur
 #  endif
 #else
 #  ifndef BOOST_MATH_HAS_GPU_SUPPORT
-#    define BOOST_MATH_STATIC_CONSTEXPR static constexpr
+#    define BOOST_MATH_INLINE_CONSTEXPR static constexpr
 #    define BOOST_MATH_STATIC static
 #    define BOOST_MATH_STATIC_LOCAL_VARIABLE
 #  else
-#    define BOOST_MATH_STATIC_CONSTEXPR constexpr
+#    define BOOST_MATH_INLINE_CONSTEXPR constexpr
 #    define BOOST_MATH_STATIC constexpr
 #    define BOOST_MATH_STATIC_LOCAL_VARIABLE static
 #  endif
 #endif
 
+#define BOOST_MATH_FP_NAN FP_NAN
+#define BOOST_MATH_FP_INFINITE FP_INFINITE
+#define BOOST_MATH_FP_ZERO FP_ZERO
+#define BOOST_MATH_FP_SUBNORMAL FP_SUBNORMAL
+#define BOOST_MATH_FP_NORMAL FP_NORMAL
+
+#else // Special section for CUDA NVRTC to ensure we consume no headers
+
+#ifndef BOOST_MATH_STANDALONE
+#  define BOOST_MATH_STANDALONE
+#endif
+
+#define BOOST_MATH_NVRTC_ENABLED
+#define BOOST_MATH_ENABLE_CUDA
+#define BOOST_MATH_HAS_GPU_SUPPORT
+
+#define BOOST_MATH_GPU_ENABLED __host__ __device__
+
+template <class T>
+BOOST_MATH_GPU_ENABLED constexpr void gpu_safe_swap(T& a, T& b) { T t(a); a = b; b = t; }
+
+#define BOOST_MATH_GPU_SAFE_SWAP(a, b) gpu_safe_swap(a, b)
+#define BOOST_MATH_GPU_SAFE_MIN(a, b) (::min)(a, b)
+#define BOOST_MATH_GPU_SAFE_MAX(a, b) (::max)(a, b)
+
+#define BOOST_MATH_FP_NAN 0
+#define BOOST_MATH_FP_INFINITE 1
+#define BOOST_MATH_FP_ZERO 2
+#define BOOST_MATH_FP_SUBNORMAL 3
+#define BOOST_MATH_FP_NORMAL 4
+
+#if defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L
+#  define BOOST_MATH_INLINE_CONSTEXPR inline constexpr
+#else
+#  define BOOST_MATH_INLINE_CONSTEXPR constexpr
+#endif
+
+#endif // NVRTC
+
 #endif // BOOST_MATH_TOOLS_CONFIG_HPP
 
 

diff --git a/include/boost/math/tools/tuple.hpp b/include/boost/math/tools/tuple.hpp
@@ -8,8 +8,6 @@
 #define BOOST_MATH_TUPLE_HPP_INCLUDED
 
 #include <boost/math/tools/config.hpp>
-#include <boost/math/tools/cxx03_warn.hpp>
-#include <tuple>
 
 #ifdef BOOST_MATH_ENABLE_CUDA
 
@@ -36,6 +34,8 @@ using thrust::tuple_element;
 
 #else
 
+#include <tuple>
+
 namespace boost { 
 namespace math {