Skip to content

Commit

Permalink
Merge pull request #4 from bigno78/autotuning-dia
Browse files Browse the repository at this point in the history
Merge the final changes
  • Loading branch information
bigno78 committed May 15, 2023
2 parents 6847393 + b65e416 commit fe66995
Show file tree
Hide file tree
Showing 27 changed files with 2,680 additions and 587 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,4 @@ performance/spmm/spmm
*.user

.vscode
a.out
6 changes: 3 additions & 3 deletions build.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
nvcc -DCUSP_PATH=$(realpath .) \
-I . -I ../KTT/Source \
-l cuda -l ktt -L ../KTT/Build/x86_64_Debug/ \
--linker-options=-rpath,$(realpath ../KTT/Build/x86_64_Debug/) \
-std=c++17 -g -O3 -DKTT_LINE_INFO -lineinfo main.cu
-l cuda -l ktt -L ../KTT/Build/x86_64_Release/ \
--linker-options=-rpath,$(realpath ../KTT/Build/x86_64_Release/) \
-std=c++17 -g -O3 -lineinfo main.cu

[ "$?" -eq "0" ] || exit 1

Expand Down
6 changes: 5 additions & 1 deletion build/build-env.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,10 @@ def addKTT(env):
env.Append(RPATH = [ ktt_lib_path ])
env.Append(LIBS = [ "ktt" ])

if env['mode'] == 'debug':
env.Append(CFLAGS = ['-DKTT_LINE_INFO'])
env.Append(CXXFLAGS = ['-DKTT_LINE_INFO'])


def Environment(buildDir):
# allow the user discretion to choose the MSVC version
Expand Down Expand Up @@ -465,7 +469,7 @@ def Environment(buildDir):
env['ENV']['DYLD_LIBRARY_PATH'] = os.environ['DYLD_LIBRARY_PATH']
elif 'LD_LIBRARY_PATH' in os.environ:
env['ENV']['LD_LIBRARY_PATH'] = os.environ['LD_LIBRARY_PATH']

addKTT(env)

# generate help text
Expand Down
2 changes: 1 addition & 1 deletion cusp/detail/temporary_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#include <thrust/detail/temporary_array.h>

#if THRUST_VERSION >= 100800
#define TEMP_HOST_DEVICE_DECORATORS __host__ __device__
#define TEMP_HOST_DEVICE_DECORATORS __host__
#else
#define TEMP_HOST_DEVICE_DECORATORS
#endif
Expand Down
137 changes: 137 additions & 0 deletions cusp/ktt/detail/ellr_matrix.inl
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#include <cusp/array2d.h>
#include <cusp/convert.h>
#include <cusp/detail/utils.h>

#include <thrust/transform.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/device_ptr.h> // raw_pointer_cast

namespace cusp
{

namespace ktt
{


template<typename IndexType>
struct ell_row_length
{
const IndexType* column_indices;
size_t num_cols_per_row;
size_t pitch;

__host__ __device__ IndexType operator()(IndexType row_idx)
{
IndexType len = 0;

while (len < num_cols_per_row
&& column_indices[row_idx + len*pitch] >= 0)
{
len++;
}

return len;
}
};

template<typename IndexType, typename ValueType, typename MemorySpace>
void
compute_row_lengths(cusp::ktt::ellr_matrix<IndexType, ValueType, MemorySpace>& A)
{
thrust::counting_iterator<IndexType> row_idx_it(0);

thrust::transform(
row_idx_it,
row_idx_it + A.num_rows,
A.row_lengths.begin(),
ell_row_length<IndexType>{
thrust::raw_pointer_cast(&A.column_indices(0, 0)),
A.column_indices.num_cols,
A.column_indices.pitch }
);
}


//////////////////
// Constructors //
//////////////////

template <typename IndexType, typename ValueType, class MemorySpace>
ellr_matrix<IndexType, ValueType, MemorySpace>
::ellr_matrix(const size_t num_rows, const size_t num_cols, const size_t num_entries,
const size_t num_entries_per_row, const size_t alignment)
: Parent(num_rows, num_cols, num_entries, num_entries_per_row, alignment)
{
row_lengths.resize(num_rows);
}

// construct from a different matrix
template <typename IndexType, typename ValueType, class MemorySpace>
template <typename MatrixType>
ellr_matrix<IndexType, ValueType, MemorySpace>
::ellr_matrix(const MatrixType& matrix)
{
cusp::convert(matrix, *this);
update_row_lengths();
}

//////////////////////
// Member Functions //
//////////////////////

template <typename IndexType, typename ValueType, class MemorySpace>
void
ellr_matrix<IndexType, ValueType, MemorySpace>
::swap(ellr_matrix& matrix)
{
Parent::swap(matrix);
row_lengths.swap(matrix.row_lengths);
}

template <typename IndexType, typename ValueType, class MemorySpace>
void
ellr_matrix<IndexType, ValueType, MemorySpace>
::resize(const size_t num_rows, const size_t num_cols, const size_t num_entries,
const size_t num_entries_per_row)
{
Parent::resize(num_rows, num_cols, num_entries, num_entries_per_row);
row_lengths.resize(num_rows);
}

template <typename IndexType, typename ValueType, class MemorySpace>
void
ellr_matrix<IndexType, ValueType, MemorySpace>
::resize(const size_t num_rows, const size_t num_cols, const size_t num_entries,
const size_t num_entries_per_row, const size_t alignment)
{
Parent::resize(num_rows, num_cols, num_entries,
num_entries_per_row, alignment);
row_lengths.resize(num_rows);
}

// assignment from another matrix
template <typename IndexType, typename ValueType, class MemorySpace>
template <typename MatrixType>
ellr_matrix<IndexType, ValueType, MemorySpace>&
ellr_matrix<IndexType, ValueType, MemorySpace>
::operator=(const MatrixType& matrix)
{
cusp::convert(matrix, *this);
update_row_lengths();

return *this;
}

template <typename IndexType, typename ValueType, class MemorySpace>
void ellr_matrix<IndexType, ValueType, MemorySpace>::update_row_lengths()
{
if (row_lengths.size() != this->num_rows)
row_lengths.resize(this->num_rows);

compute_row_lengths(*this);
}


} // namespace ktt

} // namespace cusp
74 changes: 34 additions & 40 deletions cusp/ktt/detail/ktt.inl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace ktt {

namespace detail {


inline std::unique_ptr<::ktt::Tuner> tuner;
inline bool is_enabled = true;

Expand All @@ -41,17 +42,18 @@ inline void lazy_init()
CUstream stream;
cuStreamCreate(&stream, CU_STREAM_DEFAULT);

::ktt::ComputeApiInitializer initializer(context, std::vector<::ktt::ComputeQueue>{ stream });
std::vector<::ktt::ComputeQueue> queues = { stream };
::ktt::ComputeApiInitializer initializer(context, queues);

tuner = std::make_unique<::ktt::Tuner>(::ktt::ComputeApi::CUDA, initializer);
tuner = std::make_unique<::ktt::Tuner>(::ktt::ComputeApi::CUDA,
initializer);
}

std::string compiler_flags = "-std=c++17 ";
#ifdef KTT_LINE_INFO
compiler_flags += "-lineinfo ";
#endif
tuner->SetCompilerOptions(compiler_flags);

tuner->SetValidationMode(::ktt::ValidationMode::OfflineTuning);

std::atexit(cleanup);
Expand All @@ -77,70 +79,62 @@ inline ::ktt::Tuner& get_tuner()
}


template <typename Matrix,
template <template<typename, typename, typename> typename Matrix,
typename IndexType,
typename ValueType1,
typename ValueType2>
::ktt::KernelResult multiply(const Matrix& A,
const cusp::array1d<ValueType1, cusp::device_memory>& x,
cusp::array1d<ValueType2, cusp::device_memory>& y)
typename ValueType2,
typename ValueType3>
::ktt::KernelResult multiply(
const Matrix<IndexType, ValueType1, cusp::device_memory>& A,
const cusp::array1d<ValueType2, cusp::device_memory>& x,
cusp::array1d<ValueType3, cusp::device_memory>& y)
{
return cusp::system::cuda::ktt::multiply(get_tuner(), A, x, y);
}


template <typename Matrix,
template <template<typename, typename, typename> typename Matrix,
typename IndexType,
typename ValueType1,
typename ValueType2>
::ktt::KernelResult multiply(const Matrix& A,
const cusp::array1d<ValueType1, cusp::device_memory>& x,
cusp::array1d<ValueType2, cusp::device_memory>& y,
const ::ktt::KernelConfiguration& configuration,
bool run_with_profiling)
typename ValueType2,
typename ValueType3>
::ktt::KernelResult multiply(
const Matrix<IndexType, ValueType1, cusp::device_memory>& A,
const cusp::array1d<ValueType2, cusp::device_memory>& x,
cusp::array1d<ValueType3, cusp::device_memory>& y,
const ::ktt::KernelConfiguration& configuration,
bool run_with_profiling)
{
return cusp::system::cuda::ktt::multiply(get_tuner(), A, x, y, configuration, run_with_profiling);
}


template <typename IndexType,
template <template<typename, typename, typename> typename Matrix,
typename IndexType,
typename ValueType1,
typename ValueType2,
typename ValueType3>
std::vector<::ktt::KernelResult>
tune(const cusp::dia_matrix<IndexType, ValueType1, cusp::device_memory>& A,
tune(const Matrix<IndexType, ValueType1, cusp::device_memory>& A,
const cusp::array1d<ValueType2, cusp::device_memory>& x,
cusp::array1d<ValueType3, cusp::device_memory>& y,
std::optional<::ktt::ReferenceComputation> reference_computation)
{
return cusp::system::cuda::ktt::tune(get_tuner(), A, x, y, reference_computation);
}


template<typename IndexType,
typename ValueType1,
typename ValueType2,
typename ValueType3,
typename Format>
void reset_tuning()
{
using namespace cusp::system::cuda::ktt;

kernel_context kernel = get_kernel<IndexType, ValueType1, ValueType2, ValueType3>(get_tuner(), Format{});
detail::tuner->ClearData(kernel.kernel_id);
}


template <typename MatrixType,
typename ValueType1,
typename ValueType2>
typename ValueType2,
typename MemorySpace1,
typename MemorySpace2>
void reset_tuning(const MatrixType& A,
const cusp::array1d<ValueType1, cusp::device_memory>& x,
cusp::array1d<ValueType2, cusp::device_memory>& y)
const cusp::array1d<ValueType1, MemorySpace1>& x,
cusp::array1d<ValueType2, MemorySpace2>& y)
{
using IndexType = typename MatrixType::index_type;
using ValueType = typename MatrixType::value_type;
using Format = typename MatrixType::format;

return reset_tuning<IndexType, ValueType, ValueType2, ValueType2, Format>();
auto& tuner = get_tuner();
const auto& kernel = cusp::system::cuda::ktt::get_kernel(tuner, A, x, y);
tuner.ClearData(kernel.kernel_id);
}


Expand Down
Loading

0 comments on commit fe66995

Please sign in to comment.