mfma_f32_16x16x8xf32 > Struct Reference#
ck::mfma_type< MfmaInstr::mfma_f32_16x16x8xf32 > Struct Reference
#include <xdlops_gemm.hpp>
Public Member Functions | |
| template<index_t MPerXdlops, index_t NPerXdlops, class FloatA, class FloatB, class FloatC> | |
| __device__ void | run (const FloatA &a, const FloatB &b, FloatC ®_c) const |
Static Public Attributes | |
| static constexpr index_t | wave_size = 64 |
| static constexpr index_t | m_per_blk = 16 |
| static constexpr index_t | n_per_blk = 16 |
| static constexpr index_t | num_threads_per_blk = n_per_blk |
| static constexpr index_t | num_regs_per_blk = m_per_blk * n_per_blk / wave_size |
| static constexpr index_t | num_input_blks = m_per_blk / num_regs_per_blk |
| static constexpr index_t | group_size = 4 |
| static constexpr index_t | num_groups_per_blk = 1 |
| static constexpr index_t | num_output_blks = 1 |
| static constexpr index_t | k_per_blk = 2 |
| static constexpr bool | is_k_reduction = true |
Detailed Description
num_threads_per_blk == n_per_blk num_regs_per_blk * num_input_blks == m_per_blk num_regs_per_blk * wave_size == m_per_blk * n_per_blk
group_size * num_groups_per_blk == num_regs_per_blk
num_regs_per_blk is output(CD) register size which is determined by the instruction. k_per_blk(K1PerXdlops) is input(AB) register size which is determined by the instruction. group_size is corresponding to CD rows mapping. see: GetBeginOfThreadBlk()
is_k_reduction = (k_per_blk == KPerXdlops) ? false: true.
if (is_k_reduction){ num_output_blks == 1; } else { num_input_blks == num_output_blks; }
Member Function Documentation
◆ run()
|
inline |
Member Data Documentation
◆ group_size
|
staticconstexpr |
◆ is_k_reduction
|
staticconstexpr |
◆ k_per_blk
|
staticconstexpr |
◆ m_per_blk
|
staticconstexpr |
◆ n_per_blk
|
staticconstexpr |
◆ num_groups_per_blk
|
staticconstexpr |
◆ num_input_blks
|
staticconstexpr |
◆ num_output_blks
|
staticconstexpr |
◆ num_regs_per_blk
|
staticconstexpr |
◆ num_threads_per_blk
|
staticconstexpr |
◆ wave_size
|
staticconstexpr |
The documentation for this struct was generated from the following file: