Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix instances of 'scan' copy-pasted into reduction documentation #221

Merged
merged 1 commit into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cub/cub/block/block_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ public:
ReductionOp reduction_op, ///< [in] Binary reduction functor
int num_valid) ///< [in] Number of threads containing valid elements (may be less than BLOCK_THREADS)
{
// Determine if we scan skip bounds checking
// Determine if we skip bounds checking
if (num_valid >= BLOCK_THREADS)
{
return InternalBlockReduce(temp_storage).template Reduce<true>(input, num_valid, reduction_op);
Expand Down Expand Up @@ -585,7 +585,7 @@ public:
T input, ///< [in] Calling thread's input
int num_valid) ///< [in] Number of threads containing valid elements (may be less than BLOCK_THREADS)
{
// Determine if we scan skip bounds checking
// Determine if we skip bounds checking
if (num_valid >= BLOCK_THREADS)
{
return InternalBlockReduce(temp_storage).template Sum<true>(input, num_valid);
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/block/specializations/block_reduce_raking.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ struct BlockReduceRaking

template <bool IS_FULL_TILE, typename ReductionOp, int ITERATION>
__device__ __forceinline__ T RakingReduction(
ReductionOp reduction_op, ///< [in] Binary scan operator
ReductionOp reduction_op, ///< [in] Binary reduction operator
T *raking_segment,
T partial, ///< [in] <b>[<em>lane</em><sub>0</sub> only]</b> Warp-wide aggregate reduction of input items
int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS)
Expand All @@ -142,7 +142,7 @@ struct BlockReduceRaking

template <bool IS_FULL_TILE, typename ReductionOp>
__device__ __forceinline__ T RakingReduction(
ReductionOp /*reduction_op*/, ///< [in] Binary scan operator
ReductionOp /*reduction_op*/, ///< [in] Binary reduction operator
T * /*raking_segment*/,
T partial, ///< [in] <b>[<em>lane</em><sub>0</sub> only]</b> Warp-wide aggregate reduction of input items
int /*num_valid*/, ///< [in] Number of valid elements (may be less than BLOCK_THREADS)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ struct BlockReduceRakingCommutativeOnly
typename BlockRakingLayout::TempStorage raking_grid; ///< Padded thread block raking grid
} default_storage;

typename FallBack::TempStorage fallback_storage; ///< Fall-back storage for non-commutative block scan
typename FallBack::TempStorage fallback_storage; ///< Fall-back storage for non-commutative block reduction
};


Expand Down Expand Up @@ -144,7 +144,7 @@ struct BlockReduceRakingCommutativeOnly
T *raking_segment = BlockRakingLayout::RakingPtr(temp_storage.default_storage.raking_grid, linear_tid);
partial = internal::ThreadReduce<SEGMENT_LENGTH>(raking_segment, cub::Sum(), partial);

// Warpscan
// Warp reduction
partial = WarpReduce(temp_storage.default_storage.warp_storage).Sum(partial);
}
}
Expand Down Expand Up @@ -181,7 +181,7 @@ struct BlockReduceRakingCommutativeOnly
T *raking_segment = BlockRakingLayout::RakingPtr(temp_storage.default_storage.raking_grid, linear_tid);
partial = internal::ThreadReduce<SEGMENT_LENGTH>(raking_segment, reduction_op, partial);

// Warpscan
// Warp reduction
partial = WarpReduce(temp_storage.default_storage.warp_storage).Reduce(partial, reduction_op);
}
}
Expand Down
10 changes: 5 additions & 5 deletions cub/cub/block/specializations/block_reduce_warp_reductions.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ struct BlockReduceWarpReductions
/// Shared memory storage layout type
struct _TempStorage
{
typename WarpReduce::TempStorage warp_reduce[WARPS]; ///< Buffer for warp-synchronous scan
T warp_aggregates[WARPS]; ///< Shared totals from each warp-synchronous scan
typename WarpReduce::TempStorage warp_reduce[WARPS]; ///< Buffer for warp-synchronous reduction
T warp_aggregates[WARPS]; ///< Shared totals from each warp-synchronous reduction
T block_prefix; ///< Shared prefix for the entire thread block
};

Expand Down Expand Up @@ -108,7 +108,7 @@ struct BlockReduceWarpReductions

template <bool FULL_TILE, typename ReductionOp, int SUCCESSOR_WARP>
__device__ __forceinline__ T ApplyWarpAggregates(
ReductionOp reduction_op, ///< [in] Binary scan operator
ReductionOp reduction_op, ///< [in] Binary reduction operator
T warp_aggregate, ///< [in] <b>[<em>lane</em><sub>0</sub> only]</b> Warp-wide aggregate reduction of input items
int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS)
Int2Type<SUCCESSOR_WARP> /*successor_warp*/)
Expand All @@ -123,7 +123,7 @@ struct BlockReduceWarpReductions

template <bool FULL_TILE, typename ReductionOp>
__device__ __forceinline__ T ApplyWarpAggregates(
ReductionOp /*reduction_op*/, ///< [in] Binary scan operator
ReductionOp /*reduction_op*/, ///< [in] Binary reduction operator
T warp_aggregate, ///< [in] <b>[<em>lane</em><sub>0</sub> only]</b> Warp-wide aggregate reduction of input items
int /*num_valid*/, ///< [in] Number of valid elements (may be less than BLOCK_THREADS)
Int2Type<WARPS> /*successor_warp*/)
Expand All @@ -137,7 +137,7 @@ struct BlockReduceWarpReductions
bool FULL_TILE,
typename ReductionOp>
__device__ __forceinline__ T ApplyWarpAggregates(
ReductionOp reduction_op, ///< [in] Binary scan operator
ReductionOp reduction_op, ///< [in] Binary reduction operator
T warp_aggregate, ///< [in] <b>[<em>lane</em><sub>0</sub> only]</b> Warp-wide aggregate reduction of input items
int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS)
{
Expand Down
24 changes: 12 additions & 12 deletions cub/cub/thread/thread_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ __device__ __forceinline__ AccumT ThreadReduce(
/**
* \brief Perform a sequential reduction over \p LENGTH elements of the \p input array, seeded with the specified \p prefix. The aggregate is returned.
*
* \tparam LENGTH LengthT of input array
* \tparam T <b>[inferred]</b> The data type to be reduced.
* \tparam ScanOp <b>[inferred]</b> Binary reduction operator type having member <tt>T operator()(const T &a, const T &b)</tt>
* \tparam LENGTH LengthT of input array
* \tparam T <b>[inferred]</b> The data type to be reduced.
* \tparam ReductionOp <b>[inferred]</b> Binary reduction operator type having member <tt>T operator()(const T &a, const T &b)</tt>
*/
template <
int LENGTH,
Expand All @@ -92,9 +92,9 @@ __device__ __forceinline__ AccumT ThreadReduce(
/**
* \brief Perform a sequential reduction over \p LENGTH elements of the \p input array. The aggregate is returned.
*
* \tparam LENGTH LengthT of input array
* \tparam T <b>[inferred]</b> The data type to be reduced.
* \tparam ScanOp <b>[inferred]</b> Binary reduction operator type having member <tt>T operator()(const T &a, const T &b)</tt>
* \tparam LENGTH LengthT of input array
* \tparam T <b>[inferred]</b> The data type to be reduced.
* \tparam ReductionOp <b>[inferred]</b> Binary reduction operator type having member <tt>T operator()(const T &a, const T &b)</tt>
*/
template <
int LENGTH,
Expand All @@ -112,9 +112,9 @@ __device__ __forceinline__ T ThreadReduce(
/**
* \brief Perform a sequential reduction over the statically-sized \p input array, seeded with the specified \p prefix. The aggregate is returned.
*
* \tparam LENGTH <b>[inferred]</b> LengthT of \p input array
* \tparam T <b>[inferred]</b> The data type to be reduced.
* \tparam ScanOp <b>[inferred]</b> Binary reduction operator type having member <tt>T operator()(const T &a, const T &b)</tt>
* \tparam LENGTH <b>[inferred]</b> LengthT of \p input array
* \tparam T <b>[inferred]</b> The data type to be reduced.
* \tparam ReductionOp <b>[inferred]</b> Binary reduction operator type having member <tt>T operator()(const T &a, const T &b)</tt>
*/
template <
int LENGTH,
Expand All @@ -134,9 +134,9 @@ __device__ __forceinline__ AccumT ThreadReduce(
/**
* \brief Serial reduction with the specified operator
*
* \tparam LENGTH <b>[inferred]</b> LengthT of \p input array
* \tparam T <b>[inferred]</b> The data type to be reduced.
* \tparam ScanOp <b>[inferred]</b> Binary reduction operator type having member <tt>T operator()(const T &a, const T &b)</tt>
* \tparam LENGTH <b>[inferred]</b> LengthT of \p input array
* \tparam T <b>[inferred]</b> The data type to be reduced.
* \tparam ReductionOp <b>[inferred]</b> Binary reduction operator type having member <tt>T operator()(const T &a, const T &b)</tt>
*/
template <
int LENGTH,
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/warp/specializations/warp_reduce_shfl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ struct WarpReduceShfl


//---------------------------------------------------------------------
// Templated inclusive scan iteration
// Templated reduction iteration
//---------------------------------------------------------------------

template <typename ReductionOp, int STEP>
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/warp/specializations/warp_reduce_smem.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ struct WarpReduceSmem
/// Whether the logical warp size is a power-of-two
IS_POW_OF_TWO = PowerOfTwo<LOGICAL_WARP_THREADS>::VALUE,

/// The number of warp scan steps
/// The number of warp reduction steps
STEPS = Log2<LOGICAL_WARP_THREADS>::VALUE,

/// The number of threads in half a warp
Expand Down
Loading