#include <blockwise_gemm_xdlops_skip_b_lds.hpp>
◆ BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| __host__ __device__ ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1 |
( |
| ) |
|
|
inline |
◆ CalculateAThreadOriginDataIndex()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| __device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::CalculateAThreadOriginDataIndex |
( |
| ) |
|
|
inlinestatic |
◆ CalculateBThreadOriginDataIndex()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| __device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::CalculateBThreadOriginDataIndex |
( |
| ) |
|
|
inlinestatic |
◆ CalculateCThreadOriginDataIndex()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| __device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::CalculateCThreadOriginDataIndex |
( |
Number< m0 > | , |
|
|
Number< n0 > | , |
|
|
Number< xdlops_i > | , |
|
|
Number< blk_i > | ) |
|
inlinestatic |
◆ GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| __host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| __host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetCThreadBuffer()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| __host__ __device__ constexpr auto & ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCThreadBuffer |
( |
| ) |
|
|
inlineconstexpr |
◆ GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| __host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| __host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetWaveIdx()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| __device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetWaveIdx |
( |
| ) |
|
|
inlinestatic |
◆ MakeABlockDescriptor_M0_M1_M2_K()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| __host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MakeABlockDescriptor_M0_M1_M2_K |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
template<typename CGridDesc_G_M_N>
| __host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 |
( |
const CGridDesc_G_M_N & | c_grid_desc_g_m_n | ) |
|
|
inlinestaticconstexpr |
◆ MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
template<typename CGridDesc_M_N>
| __host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 |
( |
const CGridDesc_M_N & | c_grid_desc_m_n | ) |
|
|
inlinestaticconstexpr |
◆ MoveABlockSliceWindow()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| __device__ void ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MoveABlockSliceWindow |
( |
| ) |
|
|
inline |
◆ ResetABlockStartWindow()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| __device__ void ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::ResetABlockStartWindow |
( |
| ) |
|
|
inline |
◆ Run()
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
| __device__ void ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::Run |
( |
const ABlockBuffer & | a_block_buf, |
|
|
const BBlockBuffer & | b_thread_buf, |
|
|
CThreadBuffer & | c_thread_buf ) const |
|
inline |
◆ a_block_desc_m0_m1_m2_k
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::a_block_desc_m0_m1_m2_k = MakeABlockDescriptor_M0_M1_M2_K() |
|
staticconstexpr |
◆ A_K0
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::A_K0 = AK0MK1BlockDesc{}.GetLength(I0) |
|
staticconstexpr |
◆ A_K1
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::A_K1 = AK0MK1BlockDesc{}.GetLength(I2) |
|
staticconstexpr |
◆ c_thread_buf_
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| StaticBufferTupleOfVector<AddressSpaceEnum::Vgpr, FloatAcc, MRepeat * NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true> ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::c_thread_buf_ |
◆ I0
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::I0 = Number<0>{} |
|
staticconstexpr |
◆ I1
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::I1 = Number<1>{} |
|
staticconstexpr |
◆ I2
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::I2 = Number<2>{} |
|
staticconstexpr |
◆ I3
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::I3 = Number<3>{} |
|
staticconstexpr |
◆ K0PerThread
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::K0PerThread = K0PerBlock / xdlops_gemm.K0PerXdlops |
|
staticconstexpr |
◆ KPerBlock
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::KPerBlock = K0PerBlock * KPack |
|
staticconstexpr |
◆ KPerThread
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::KPerThread = KPerBlock / xdlops_gemm.K0PerXdlops |
|
staticconstexpr |
◆ MWaves
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MWaves = MPerBlock / (MRepeat * MPerXDL) |
|
staticconstexpr |
◆ NWaves
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::NWaves = NPerBlock / (NRepeat * NPerXDL) |
|
staticconstexpr |
◆ WaveSize
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::WaveSize = BlockSize / MWaves / NWaves |
|
staticconstexpr |
◆ xdlops_gemm
template<
index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t K0PerBlock,
index_t MPerXDL,
index_t NPerXDL,
index_t MRepeat,
index_t NRepeat,
index_t KPack>
| auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::xdlops_gemm = XdlopsGemm<FloatAB, MPerXDL, NPerXDL, KPack>{} |
|
staticconstexpr |
The documentation for this struct was generated from the following file: