Invoker Struct Reference#
ck::tensor_operation::device::DeviceGemmXdlSplitKCShuffle_LdsDirectLoad< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferScalarPerVector, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferScalarPerVector, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CBlockTransferScalarPerVector_NWaveNPerXDL, ComputeType, PipelineVer, LoopSched >::Invoker Struct Reference
#include <device_gemm_xdl_splitk_c_shuffle_lds_direct_load.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceGemmXdlSplitKCShuffle_LdsDirectLoad< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferScalarPerVector, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferScalarPerVector, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CBlockTransferScalarPerVector_NWaveNPerXDL, ComputeType, PipelineVer, LoopSched >::Invoker:
Public Member Functions | |
| template<typename Argument_> | |
| void | Print (const Argument_ &karg) |
| template<typename GridwiseGemm> | |
| float | RunImp (const Argument &karg, const StreamConfig &stream_config=StreamConfig{}) |
| INVOKER_RUN_IMPL float | Run (const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override |
| Public Member Functions inherited from ck::tensor_operation::device::BaseInvoker | |
| BaseInvoker ()=default | |
| BaseInvoker (const BaseInvoker &)=default | |
| BaseInvoker & | operator= (const BaseInvoker &)=default |
| virtual | ~BaseInvoker () |
Member Function Documentation
◆ Print()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
template<typename Argument_>
|
inline |
◆ Run()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseInvoker.
◆ RunImp()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
template<typename GridwiseGemm>
|
inline |
The documentation for this struct was generated from the following file: