WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy > Struct Template Reference

WeightPreshufflePipelineAGmemBGmemCRegV2&lt; Problem, PipelinePolicy &gt; Struct Template Reference#

Composable Kernel: ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy > Struct Template Reference
ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy > Struct Template Reference

#include <wp_pipeline_agmem_bgmem_creg_v2.hpp>

Inheritance diagram for ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >:
ck_tile::BaseWeightPreshufflePipelineAGmemBGmemCRegV2< Problem > ck_tile::WPQuantBPipelineAgBgCrV2< Problem, PipelinePolicy >

Public Types

using Base = BaseWeightPreshufflePipelineAGmemBGmemCRegV2<Problem>
using AsDataType = remove_cvref_t<typename Problem::AsDataTypeTuple>
using BsDataType = remove_cvref_t<typename Problem::BsDataTypeTuple>
using CDataType = remove_cvref_t<typename Problem::CDataType>
using AElementWise = remove_cvref_t<typename Problem::AElementWise>
using BElementWise = remove_cvref_t<typename Problem::BElementWise>
using BlockGemmShape = remove_cvref_t<typename Problem::BlockGemmShape>
using AsLayout = remove_cvref_t<typename Problem::AsLayoutTuple>
using BsLayout = remove_cvref_t<typename Problem::BsLayoutTuple>
using CLayout = remove_cvref_t<typename Problem::CLayout>
using ALayout = remove_cvref_t<std::tuple_element_t<0, AsLayout>>
using BLayout = remove_cvref_t<std::tuple_element_t<0, BsLayout>>
using ADataType = remove_cvref_t<std::tuple_element_t<0, AsDataType>>
using BDataType = remove_cvref_t<std::tuple_element_t<0, BsDataType>>
using BlockWeightPreshuffle
using WG = remove_cvref_t<decltype(config.template at<0>())>
using BlockTile = remove_cvref_t<typename BlockGemmShape::BlockTile>
using BlockWarps = remove_cvref_t<typename BlockGemmShape::BlockWarps>
using WarpTile = remove_cvref_t<typename BlockGemmShape::WarpTile>

Public Member Functions

template<TailNumber TailNum, typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename AElementFunction, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BFlatBlockWindowTmp >::value, bool > * = nullptr, index_t UnaryOpSize_ = 8>
CK_TILE_DEVICE auto operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename AElementFunction, typename BElementFunction, typename std::enable_if_t< is_detected< is_tuple, ADramBlockWindowTmp >::value &&is_detected< is_tuple, BFlatBlockWindowTmp >::value, bool > * = nullptr>
CK_TILE_DEVICE auto operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BFlatBlockWindowTmp >::value, bool > * = nullptr>
CK_TILE_DEVICE auto operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BFlatBlockWindowTmp >::value, bool > * = nullptr>
CK_TILE_DEVICE auto operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, index_t num_loop, TailNumber tail_number, void *__restrict__ p_smem_0, void *__restrict__ p_smem_1) const

Static Public Member Functions

template<bool IsWave32Host = false>
static constexpr index_t GetVectorSizeA ()
template<bool IsWave32Host = false>
static constexpr index_t GetVectorSizeB ()
static constexpr index_t GetVectorSizeC ()
static CK_TILE_HOST const std::string GetName ()
static CK_TILE_HOST_DEVICE constexpr auto TransposeC ()
static CK_TILE_HOST_DEVICE constexpr index_t GetSmemSize ()
static CK_TILE_HOST_DEVICE constexpr auto SchedulerPerM (index_t dsread_perM, index_t dswrite_perM, index_t load_perM)
static CK_TILE_HOST_DEVICE constexpr auto HotLoopScheduler ()
static CK_TILE_HOST_DEVICE constexpr auto Last2ndHotLoopScheduler ()
static CK_TILE_HOST_DEVICE constexpr auto LastHotLoopScheduler ()
Static Public Member Functions inherited from ck_tile::BaseWeightPreshufflePipelineAGmemBGmemCRegV2< Problem >
static CK_TILE_HOST_DEVICE constexpr auto TransposeC ()
static CK_TILE_HOST_DEVICE constexpr bool BlockHasHotloop (index_t num_loop)
static CK_TILE_HOST_DEVICE constexpr TailNumber GetBlockLoopTailNum (index_t num_loop)
template<typename RunFunction>
static CK_TILE_HOST_DEVICE auto TailHandler (const RunFunction &run_func, bool, TailNumber tail_number)

Static Public Attributes

static constexpr auto config
static constexpr index_t DsWritePreIssue = 3
static constexpr index_t DsReadPreload = 2
static constexpr index_t BlockSize = Problem::kBlockSize
static constexpr index_t WaveSize = get_warp_size()
static constexpr index_t kMPerBlock = BlockGemmShape::kM
static constexpr index_t kNPerBlock = BlockGemmShape::kN
static constexpr index_t kKPerBlock = BlockGemmShape::kK
static constexpr index_t MPerBlock = BlockGemmShape::kM
static constexpr index_t NPerBlock = BlockGemmShape::kN
static constexpr index_t KPerBlock = BlockGemmShape::kK
static constexpr index_t flatKPerWarp = BlockGemmShape::flatKPerWarp
static constexpr index_t flatNPerWarp = BlockGemmShape::flatNPerWarp
static constexpr bool kPadM = Problem::kPadM
static constexpr bool kPadN = Problem::kPadN
static constexpr bool kPadK = Problem::kPadK
static constexpr index_t kLdsAlignmentInBytes = 16
static constexpr index_t NumWaveGroups = Problem::NumWaveGroups
static constexpr auto I0 = number<0>()
static constexpr auto I1 = number<1>()
static constexpr auto I2 = number<2>()
static constexpr auto idxM = I0
static constexpr auto idxN = I1
static constexpr auto idxK = I2
static constexpr index_t MWarp = config.template at<1>()
static constexpr index_t NWarp = config.template at<2>()
static constexpr index_t MIterPerWarp = kMPerBlock / (MWarp * WG::kM)
static constexpr index_t NIterPerWarp = kNPerBlock / (NWarp * WG::kN)
static constexpr index_t KIterPerWarp = kKPerBlock / WG::kK
static constexpr index_t KFlatPerBlockPerIter = flatKPerWarp
static constexpr index_t NFlatPerBlockPerIter = flatNPerWarp
static constexpr index_t MPerBlockPerIter = kMPerBlock / MIterPerWarp
static constexpr index_t KPerBlockPerIter = kKPerBlock / KIterPerWarp
static constexpr index_t K1 = Problem::VectorLoadSize / sizeof(ADataType)
static constexpr index_t m_preload
static constexpr auto TailNum = Problem::TailNum
static constexpr index_t mfma_per_wg = 1
static constexpr index_t dsread_per_wg
static constexpr index_t dsread_num_perK
static constexpr index_t dswrite_num_perK = dsread_num_perK / (MWarp * NWarp)
static constexpr index_t dswrite_rep = (dswrite_num_perK + MIterPerWarp - 1) / MIterPerWarp
static constexpr index_t Aload_num_perK = dswrite_num_perK
static constexpr index_t Aload_rep = dswrite_rep
static constexpr index_t Bload_num_perK = kNPerBlock * WG::kK / NWarp / K1 / WaveSize
static constexpr index_t HalfMIter = (MIterPerWarp + 1) / 2
static constexpr index_t Bload_rep = (Bload_num_perK + HalfMIter - 1) / HalfMIter
static constexpr index_t mfma_perM_perK = NIterPerWarp * mfma_per_wg
static constexpr index_t dswrite_mIter = (DsWritePreIssue - 1) % MIterPerWarp
static constexpr index_t dswrite_kIter = (DsWritePreIssue - 1) / MIterPerWarp
static constexpr bool DoubleSmemBuffer = Problem::DoubleSmemBuffer
static constexpr index_t Preshuffle = Problem::Preshuffle
static constexpr bool UsePersistentKernel
Static Public Attributes inherited from ck_tile::BaseWeightPreshufflePipelineAGmemBGmemCRegV2< Problem >
static constexpr index_t PrefetchStages = 2
static constexpr index_t PrefillStages = 1
static constexpr index_t GlobalBufferNum = 1
static constexpr bool UsePersistentKernel = Problem::Traits::UsePersistentKernel

Member Typedef Documentation

◆ ADataType

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::ADataType = remove_cvref_t<std::tuple_element_t<0, AsDataType>>

◆ AElementWise

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::AElementWise = remove_cvref_t<typename Problem::AElementWise>

◆ ALayout

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::ALayout = remove_cvref_t<std::tuple_element_t<0, AsLayout>>

◆ AsDataType

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::AsDataType = remove_cvref_t<typename Problem::AsDataTypeTuple>

◆ AsLayout

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::AsLayout = remove_cvref_t<typename Problem::AsLayoutTuple>

◆ Base

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::Base = BaseWeightPreshufflePipelineAGmemBGmemCRegV2<Problem>

◆ BDataType

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::BDataType = remove_cvref_t<std::tuple_element_t<0, BsDataType>>

◆ BElementWise

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::BElementWise = remove_cvref_t<typename Problem::BElementWise>

◆ BLayout

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::BLayout = remove_cvref_t<std::tuple_element_t<0, BsLayout>>

◆ BlockGemmShape

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::BlockGemmShape = remove_cvref_t<typename Problem::BlockGemmShape>

◆ BlockTile

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::BlockTile = remove_cvref_t<typename BlockGemmShape::BlockTile>

◆ BlockWarps

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::BlockWarps = remove_cvref_t<typename BlockGemmShape::BlockWarps>

◆ BlockWeightPreshuffle

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::BlockWeightPreshuffle
Initial value:
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21

◆ BsDataType

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::BsDataType = remove_cvref_t<typename Problem::BsDataTypeTuple>

◆ BsLayout

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::BsLayout = remove_cvref_t<typename Problem::BsLayoutTuple>

◆ CDataType

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::CDataType = remove_cvref_t<typename Problem::CDataType>

◆ CLayout

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::CLayout = remove_cvref_t<typename Problem::CLayout>

◆ WarpTile

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::WarpTile = remove_cvref_t<typename BlockGemmShape::WarpTile>

◆ WG

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
using ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::WG = remove_cvref_t<decltype(config.template at<0>())>

Member Function Documentation

◆ GetName()

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
CK_TILE_HOST const std::string ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::GetName ( )
inlinestaticnodiscard

◆ GetSmemSize()

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::GetSmemSize ( )
inlinestaticconstexpr

◆ GetVectorSizeA()

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
template<bool IsWave32Host = false>
constexpr index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::GetVectorSizeA ( )
inlinestaticconstexpr

◆ GetVectorSizeB()

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
template<bool IsWave32Host = false>
constexpr index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::GetVectorSizeB ( )
inlinestaticconstexpr

◆ GetVectorSizeC()

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
constexpr index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::GetVectorSizeC ( )
inlinestaticconstexpr

◆ HotLoopScheduler()

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::HotLoopScheduler ( )
inlinestaticconstexpr

◆ Last2ndHotLoopScheduler()

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::Last2ndHotLoopScheduler ( )
inlinestaticconstexpr

◆ LastHotLoopScheduler()

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::LastHotLoopScheduler ( )
inlinestaticconstexpr

◆ operator()() [1/4]

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename AElementFunction, typename BElementFunction, typename std::enable_if_t< is_detected< is_tuple, ADramBlockWindowTmp >::value &&is_detected< is_tuple, BFlatBlockWindowTmp >::value, bool > * = nullptr>
CK_TILE_DEVICE auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::operator() ( const ADramBlockWindowTmp & a_dram_block_window_tmp,
const AElementFunction & a_element_func,
const BFlatBlockWindowTmp & b_flat_dram_block_window_tmp,
const BElementFunction & b_element_func,
index_t num_loop,
void * p_smem_ping,
void * p_smem_pong ) const
inline

◆ operator()() [2/4]

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
template<TailNumber TailNum, typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename AElementFunction, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BFlatBlockWindowTmp >::value, bool > * = nullptr, index_t UnaryOpSize_ = 8>
CK_TILE_DEVICE auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::operator() ( const ADramBlockWindowTmp & a_dram_block_window_tmp,
const AElementFunction & a_element_func,
const BFlatBlockWindowTmp & b_flat_dram_block_window_tmp,
index_t num_loop,
void * p_smem_ping,
void * p_smem_pong ) const
inline

◆ operator()() [3/4]

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BFlatBlockWindowTmp >::value, bool > * = nullptr>
CK_TILE_DEVICE auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::operator() ( const ADramBlockWindowTmp & a_dram_block_window_tmp,
const BFlatBlockWindowTmp & b_flat_dram_block_window_tmp,
index_t num_loop,
TailNumber tail_number,
void *__restrict__ p_smem_0,
void *__restrict__ p_smem_1 ) const
inline

◆ operator()() [4/4]

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BFlatBlockWindowTmp >::value, bool > * = nullptr>
CK_TILE_DEVICE auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::operator() ( const ADramBlockWindowTmp & a_dram_block_window_tmp,
const BFlatBlockWindowTmp & b_flat_dram_block_window_tmp,
index_t num_loop,
void * p_smem_ping,
void * p_smem_pong ) const
inline

◆ SchedulerPerM()

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::SchedulerPerM ( index_t dsread_perM,
index_t dswrite_perM,
index_t load_perM )
inlinestaticconstexpr

◆ TransposeC()

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::TransposeC ( )
inlinestaticconstexpr

Member Data Documentation

◆ Aload_num_perK

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::Aload_num_perK = dswrite_num_perK
staticconstexpr

◆ Aload_rep

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::Aload_rep = dswrite_rep
staticconstexpr

◆ Bload_num_perK

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::Bload_num_perK = kNPerBlock * WG::kK / NWarp / K1 / WaveSize
staticconstexpr

◆ Bload_rep

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::Bload_rep = (Bload_num_perK + HalfMIter - 1) / HalfMIter
staticconstexpr

◆ BlockSize

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::BlockSize = Problem::kBlockSize
staticconstexpr

◆ config

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::config
staticconstexpr
Initial value:
=
BlockWeightPreshuffle::BlockPolicy::template GetWarpGemmMWarpNWarp<Problem>()

◆ DoubleSmemBuffer

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
bool ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::DoubleSmemBuffer = Problem::DoubleSmemBuffer
staticconstexpr

◆ dsread_num_perK

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::dsread_num_perK
staticconstexpr
Initial value:
=
WG::kM * WG::kK * sizeof(ADataType) * MIterPerWarp / WaveSize / Problem::VectorLoadSize
static constexpr index_t MIterPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:137
remove_cvref_t< std::tuple_element_t< 0, AsDataType > > ADataType
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:72
static constexpr index_t WaveSize
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:87

◆ dsread_per_wg

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::dsread_per_wg
staticconstexpr
Initial value:
=
max(index_t(WG::kM * WG::kK * sizeof(ADataType) / WaveSize / Problem::VectorLoadSize), 1)
CK_TILE_HOST_DEVICE constexpr T max(T x)
Definition tile/core/numeric/math.hpp:161
int32_t index_t
Definition integer.hpp:9
remove_cvref_t< typename Problem::ADataType > ADataType
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:48
static constexpr index_t WaveSize
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:69

◆ DsReadPreload

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::DsReadPreload = 2
staticconstexpr

◆ dswrite_kIter

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::dswrite_kIter = (DsWritePreIssue - 1) / MIterPerWarp
staticconstexpr

◆ dswrite_mIter

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::dswrite_mIter = (DsWritePreIssue - 1) % MIterPerWarp
staticconstexpr

◆ dswrite_num_perK

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::dswrite_num_perK = dsread_num_perK / (MWarp * NWarp)
staticconstexpr

◆ dswrite_rep

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::dswrite_rep = (dswrite_num_perK + MIterPerWarp - 1) / MIterPerWarp
staticconstexpr

◆ DsWritePreIssue

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::DsWritePreIssue = 3
staticconstexpr

◆ flatKPerWarp

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::flatKPerWarp = BlockGemmShape::flatKPerWarp
staticconstexpr

◆ flatNPerWarp

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::flatNPerWarp = BlockGemmShape::flatNPerWarp
staticconstexpr

◆ HalfMIter

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::HalfMIter = (MIterPerWarp + 1) / 2
staticconstexpr

◆ I0

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::I0 = number<0>()
staticconstexpr

◆ I1

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::I1 = number<1>()
staticconstexpr

◆ I2

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::I2 = number<2>()
staticconstexpr

◆ idxK

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::idxK = I2
staticconstexpr

◆ idxM

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::idxM = I0
staticconstexpr

◆ idxN

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::idxN = I1
staticconstexpr

◆ K1

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::K1 = Problem::VectorLoadSize / sizeof(ADataType)
staticconstexpr

◆ KFlatPerBlockPerIter

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::KFlatPerBlockPerIter = flatKPerWarp
staticconstexpr

◆ KIterPerWarp

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::KIterPerWarp = kKPerBlock / WG::kK
staticconstexpr

◆ kKPerBlock

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::kKPerBlock = BlockGemmShape::kK
staticconstexpr

◆ kLdsAlignmentInBytes

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::kLdsAlignmentInBytes = 16
staticconstexpr

◆ kMPerBlock

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::kMPerBlock = BlockGemmShape::kM
staticconstexpr

◆ kNPerBlock

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::kNPerBlock = BlockGemmShape::kN
staticconstexpr

◆ kPadK

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
bool ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::kPadK = Problem::kPadK
staticconstexpr

◆ kPadM

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
bool ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::kPadM = Problem::kPadM
staticconstexpr

◆ kPadN

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
bool ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::kPadN = Problem::kPadN
staticconstexpr

◆ KPerBlock

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::KPerBlock = BlockGemmShape::kK
staticconstexpr

◆ KPerBlockPerIter

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::KPerBlockPerIter = kKPerBlock / KIterPerWarp
staticconstexpr

◆ m_preload

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::m_preload
staticconstexpr
Initial value:
static constexpr index_t MIterPerWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:103
static constexpr index_t KIterPerWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:105
static constexpr index_t DsReadPreload
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:66

◆ mfma_per_wg

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::mfma_per_wg = 1
staticconstexpr

◆ mfma_perM_perK

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::mfma_perM_perK = NIterPerWarp * mfma_per_wg
staticconstexpr

◆ MIterPerWarp

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::MIterPerWarp = kMPerBlock / (MWarp * WG::kM)
staticconstexpr

◆ MPerBlock

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::MPerBlock = BlockGemmShape::kM
staticconstexpr

◆ MPerBlockPerIter

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::MPerBlockPerIter = kMPerBlock / MIterPerWarp
staticconstexpr

◆ MWarp

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::MWarp = config.template at<1>()
staticconstexpr

◆ NFlatPerBlockPerIter

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::NFlatPerBlockPerIter = flatNPerWarp
staticconstexpr

◆ NIterPerWarp

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::NIterPerWarp = kNPerBlock / (NWarp * WG::kN)
staticconstexpr

◆ NPerBlock

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::NPerBlock = BlockGemmShape::kN
staticconstexpr

◆ NumWaveGroups

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::NumWaveGroups = Problem::NumWaveGroups
staticconstexpr

◆ NWarp

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::NWarp = config.template at<2>()
staticconstexpr

◆ Preshuffle

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::Preshuffle = Problem::Preshuffle
staticconstexpr

◆ TailNum

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
auto ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::TailNum = Problem::TailNum
staticconstexpr

◆ UsePersistentKernel

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
bool ck_tile::BaseWeightPreshufflePipelineAGmemBGmemCRegV2< Problem >::UsePersistentKernel
staticconstexpr

◆ WaveSize

template<typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
index_t ck_tile::WeightPreshufflePipelineAGmemBGmemCRegV2< Problem, PipelinePolicy >::WaveSize = get_warp_size()
staticconstexpr

The documentation for this struct was generated from the following file: