DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize > Struct Template Reference

DeviceReduceThreadWise&lt; InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize &gt; Struct Template Reference#

Composable Kernel: ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize > Struct Template Reference
ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize > Struct Template Reference

#include <device_reduce_threadwise.hpp>

Inheritance diagram for ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >:
ck::tensor_operation::device::DeviceReduce< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex > ck::tensor_operation::device::BaseOperator

Classes

struct  Argument
struct  Invoker

Public Types

using IndexDataType = int32_t

Public Member Functions

bool IsSupportedArgument (const BaseArgument *p_arg) override
std::unique_ptr< BaseArgumentMakeArgumentPointer (const std::array< index_t, Rank > inLengths, const std::array< index_t, Rank > inStrides, const std::array< index_t, NumDstDim > outLengths, const std::array< index_t, NumDstDim > outStrides, const std::array< int, NumReduceDim > reduceDims, double alpha, double beta, const void *in_dev, const void *in_index_dev, void *out_dev, void *out_index_dev, const InElementwiseOperation in_elementwise_op, const AccElementwiseOperation acc_elementwise_op) override
std::unique_ptr< BaseInvokerMakeInvokerPointer () override
std::string GetTypeString () const override
Public Member Functions inherited from ck::tensor_operation::device::DeviceReduce< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex >
virtual std::unique_ptr< BaseArgumentMakeArgumentPointer (const std::array< index_t, Rank > inLengths, const std::array< index_t, Rank > inStrides, const std::array< index_t, NumOutDim > outLengths, const std::array< index_t, NumOutDim > outStrides, const std::array< int, NumReduceDim > reduceDims, double alpha, double beta, const void *in_dev, const void *in_index_dev, void *out_dev, void *out_index_dev, const InElementwiseOperation in_elementwise_op, const AccElementwiseOperation acc_elementwise_op)=0
Public Member Functions inherited from ck::tensor_operation::device::BaseOperator
 BaseOperator ()=default
 BaseOperator (const BaseOperator &)=default
BaseOperatoroperator= (const BaseOperator &)=default
virtual std::string GetInstanceString () const
virtual std::string GetTypeIdName () const
virtual std::optional< std::string > GetObjectName () const
virtual std::optional< std::string > GetTemplateInfo () const
virtual std::string GetTypeIdHashCode () const
virtual size_t GetWorkSpaceSize (const BaseArgument *) const
virtual void SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const
virtual ~BaseOperator ()

Static Public Member Functions

static auto MakeSrc2dDescriptor (const std::array< index_t, Rank > &inLengths, const std::array< index_t, Rank > &inStrides)
static auto MakeDst1dDescriptor (const std::array< index_t, NumDstDim > &outLengths, const std::array< index_t, NumDstDim > &outStrides)

Static Public Attributes

static constexpr bool HaveIndexInput = OutputIndex && HaveIndexInputIfOutputIndex
static constexpr index_t NumInvariantDim = Rank - NumReduceDim
static constexpr index_t NumSrcDim = Rank
static constexpr index_t NumDstDim = (NumInvariantDim == 0) ? 1 : NumInvariantDim
static constexpr bool reduceAllDim = (NumInvariantDim == 0)
static constexpr index_t M_BlockTileSize = BlockSize * MThreadSliceSize
static constexpr index_t K_BlockTileSize = 1 * KThreadSliceSize
Static Public Attributes inherited from ck::tensor_operation::device::DeviceReduce< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex >
static constexpr index_t NumOutDim = (Rank - NumReduceDim == 0) ? 1 : Rank - NumReduceDim

Member Typedef Documentation

◆ IndexDataType

template<typename InDataType, typename AccDataType, typename OutDataType, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperation, typename AccElementwiseOperation, bool PropagateNan, bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInputIfOutputIndex, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
using ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::IndexDataType = int32_t

Member Function Documentation

◆ GetTypeString()

template<typename InDataType, typename AccDataType, typename OutDataType, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperation, typename AccElementwiseOperation, bool PropagateNan, bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInputIfOutputIndex, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
std::string ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::GetTypeString ( ) const
inlineoverridevirtual

◆ IsSupportedArgument()

template<typename InDataType, typename AccDataType, typename OutDataType, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperation, typename AccElementwiseOperation, bool PropagateNan, bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInputIfOutputIndex, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
bool ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::IsSupportedArgument ( const BaseArgument * p_arg)
inlineoverridevirtual

◆ MakeArgumentPointer()

template<typename InDataType, typename AccDataType, typename OutDataType, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperation, typename AccElementwiseOperation, bool PropagateNan, bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInputIfOutputIndex, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
std::unique_ptr< BaseArgument > ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::MakeArgumentPointer ( const std::array< index_t, Rank > inLengths,
const std::array< index_t, Rank > inStrides,
const std::array< index_t, NumDstDim > outLengths,
const std::array< index_t, NumDstDim > outStrides,
const std::array< int, NumReduceDim > reduceDims,
double alpha,
double beta,
const void * in_dev,
const void * in_index_dev,
void * out_dev,
void * out_index_dev,
const InElementwiseOperation in_elementwise_op,
const AccElementwiseOperation acc_elementwise_op )
inlineoverride

◆ MakeDst1dDescriptor()

template<typename InDataType, typename AccDataType, typename OutDataType, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperation, typename AccElementwiseOperation, bool PropagateNan, bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInputIfOutputIndex, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
auto ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::MakeDst1dDescriptor ( const std::array< index_t, NumDstDim > & outLengths,
const std::array< index_t, NumDstDim > & outStrides )
inlinestatic

◆ MakeInvokerPointer()

template<typename InDataType, typename AccDataType, typename OutDataType, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperation, typename AccElementwiseOperation, bool PropagateNan, bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInputIfOutputIndex, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
std::unique_ptr< BaseInvoker > ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::MakeInvokerPointer ( )
inlineoverridevirtual

◆ MakeSrc2dDescriptor()

template<typename InDataType, typename AccDataType, typename OutDataType, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperation, typename AccElementwiseOperation, bool PropagateNan, bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInputIfOutputIndex, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
auto ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::MakeSrc2dDescriptor ( const std::array< index_t, Rank > & inLengths,
const std::array< index_t, Rank > & inStrides )
inlinestatic

Member Data Documentation

◆ HaveIndexInput

template<typename InDataType, typename AccDataType, typename OutDataType, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperation, typename AccElementwiseOperation, bool PropagateNan, bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInputIfOutputIndex, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
bool ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::HaveIndexInput = OutputIndex && HaveIndexInputIfOutputIndex
staticconstexpr

◆ K_BlockTileSize

template<typename InDataType, typename AccDataType, typename OutDataType, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperation, typename AccElementwiseOperation, bool PropagateNan, bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInputIfOutputIndex, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
index_t ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::K_BlockTileSize = 1 * KThreadSliceSize
staticconstexpr

◆ M_BlockTileSize

template<typename InDataType, typename AccDataType, typename OutDataType, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperation, typename AccElementwiseOperation, bool PropagateNan, bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInputIfOutputIndex, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
index_t ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::M_BlockTileSize = BlockSize * MThreadSliceSize
staticconstexpr

◆ NumDstDim

template<typename InDataType, typename AccDataType, typename OutDataType, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperation, typename AccElementwiseOperation, bool PropagateNan, bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInputIfOutputIndex, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
index_t ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::NumDstDim = (NumInvariantDim == 0) ? 1 : NumInvariantDim
staticconstexpr

◆ NumInvariantDim

template<typename InDataType, typename AccDataType, typename OutDataType, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperation, typename AccElementwiseOperation, bool PropagateNan, bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInputIfOutputIndex, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
index_t ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::NumInvariantDim = Rank - NumReduceDim
staticconstexpr

◆ NumSrcDim

template<typename InDataType, typename AccDataType, typename OutDataType, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperation, typename AccElementwiseOperation, bool PropagateNan, bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInputIfOutputIndex, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
index_t ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::NumSrcDim = Rank
staticconstexpr

◆ reduceAllDim

template<typename InDataType, typename AccDataType, typename OutDataType, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperation, typename AccElementwiseOperation, bool PropagateNan, bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInputIfOutputIndex, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
bool ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::reduceAllDim = (NumInvariantDim == 0)
staticconstexpr

The documentation for this struct was generated from the following file: