device_gemm_xdl_skip_b_lds.hpp Source File#
device_gemm_xdl_skip_b_lds.hpp
Go to the documentation of this file.
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition host_utility/kernel_launch.hpp:14
Definition convolution_backward_data_specialization.hpp:8
GemmSpecialization
Definition gemm_specialization.hpp:11
@ MNPadding
Definition gemm_specialization.hpp:17
Definition convolution_backward_data_specialization.hpp:7
Definition ck.hpp:268
__host__ __device__ constexpr auto make_pass_through_transform(const LowLength &low_length)
Definition multi_index_transform_helper.hpp:12
__host__ __device__ constexpr auto make_naive_tensor_descriptor(const Tuple< Lengths... > &lengths, const Tuple< Strides... > &strides)
Definition tensor_descriptor_helper.hpp:49
__host__ __device__ constexpr auto make_right_pad_transform(const LowLength &low_length, const RightPadLength &right_pad, integral_constant< bool, SkipIsValidCheck >=integral_constant< bool, false >{})
Definition multi_index_transform_helper.hpp:37
__global__ void kernel_gemm_xdlops_skip_b_lds_v1(const FloatAB *__restrict__ p_a_grid, const FloatAB *__restrict__ p_b_grid, FloatC *__restrict__ p_c_grid, const AGridDesc_K0_M_K1 a_grid_desc_k0_m_k1, const BGridDesc_K0_N_K1 b_grid_desc_k0_n_k1, const CGridDesc_M_N c_grid_desc_m_n, const AElementwiseOperation a_element_op, const BElementwiseOperation b_element_op, const CElementwiseOperation c_element_op, const Block2CTileMap block_2_ctile_map)
Definition gridwise_gemm_xdlops_skip_b_lds_v1.hpp:34
typename remove_reference< T >::type remove_reference_t
Definition type.hpp:292
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__host__ __device__ constexpr auto transform_tensor_descriptor(const OldTensorDescriptor &old_tensor_desc, const NewTransforms &new_transforms, NewLowerDimensionOldVisibleIdss, NewUpperDimensionNewVisibleIdss)
Definition tensor_description/tensor_descriptor.hpp:319
__host__ __device__ constexpr auto make_unmerge_transform(const UpLengths &up_lengths, integral_constant< bool, Use24BitIntegerCalculation >=integral_constant< bool, false >{})
Definition multi_index_transform_helper.hpp:90
Definition ck/stream_config.hpp:10
Definition gridwise_gemm_xdlops_skip_b_lds_v1.hpp:117
__host__ static __device__ constexpr bool CheckValidity(const AGridDesc_K0_M_K1 &a_grid_desc_k0_m_k1, const BGridDesc_K0_N_K1 &b_grid_desc_k0_n_k1, const CGridDesc_M_N &c_grid_desc_m_n, index_t M01, index_t N01)
Definition gridwise_gemm_xdlops_skip_b_lds_v1.hpp:192
Definition utility/sequence.hpp:43
Definition device_base.hpp:197
BaseArgument()=default
BaseInvoker()=default
Definition device_gemm.hpp:22
Definition device_gemm_xdl_skip_b_lds.hpp:235
CDataType * p_c_grid_
Definition device_gemm_xdl_skip_b_lds.hpp:272
index_t N01_
Definition device_gemm_xdl_skip_b_lds.hpp:277
const BDataType * p_b_grid_
Definition device_gemm_xdl_skip_b_lds.hpp:271
AGridDesc_K0_M_K1 a_grid_desc_k0_m_k1_
Definition device_gemm_xdl_skip_b_lds.hpp:273
CElementwiseOperation c_element_op_
Definition device_gemm_xdl_skip_b_lds.hpp:280
CGridDesc_M_N c_grid_desc_m_n_
Definition device_gemm_xdl_skip_b_lds.hpp:275
BElementwiseOperation b_element_op_
Definition device_gemm_xdl_skip_b_lds.hpp:279
const ADataType * p_a_grid_
Definition device_gemm_xdl_skip_b_lds.hpp:270
AElementwiseOperation a_element_op_
Definition device_gemm_xdl_skip_b_lds.hpp:278
BGridDesc_K0_N_K1 b_grid_desc_k0_n_k1_
Definition device_gemm_xdl_skip_b_lds.hpp:274
index_t M01_
Definition device_gemm_xdl_skip_b_lds.hpp:276
Argument(const ADataType *p_a_grid, const BDataType *p_b_grid, CDataType *p_c_grid, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, index_t M01, index_t N01, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op)
Definition device_gemm_xdl_skip_b_lds.hpp:236
Definition device_gemm_xdl_skip_b_lds.hpp:285
INVOKER_RUN_IMPL float Run(const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override
Definition device_gemm_xdl_skip_b_lds.hpp:393
float RunImp(const Argument &arg, const StreamConfig &stream_config=StreamConfig{})
Definition device_gemm_xdl_skip_b_lds.hpp:289
DeviceGemmXdlSkipBLds::Argument Argument
Definition device_gemm_xdl_skip_b_lds.hpp:286
Definition device_gemm_xdl_skip_b_lds.hpp:65
static constexpr auto I2
Definition device_gemm_xdl_skip_b_lds.hpp:72
decltype(MakeCGridDescriptor_M_N(1, 1, 1)) CGridDesc_M_N
Definition device_gemm_xdl_skip_b_lds.hpp:192
static auto MakeInvoker()
Definition device_gemm_xdl_skip_b_lds.hpp:472
static constexpr auto I1
Definition device_gemm_xdl_skip_b_lds.hpp:71
static constexpr auto NXdlPerWave32
Definition device_gemm_xdl_skip_b_lds.hpp:68
GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, ADataType, AccDataType, CDataType, InMemoryDataOperationEnum::Set, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, K1, MXdlPerWave, NXdlPerWave_, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, false, ABlockLdsAddExtraM, BBlockTransferSrcScalarPerVector, false, BBlockBufferSize, Sequence< 0, 2, 4, 5, 6, 1, 3, 7 >, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector > GridwiseGemmBase
Definition device_gemm_xdl_skip_b_lds.hpp:196
decltype(MakeBGridDescriptor_K0_N_K1(1, 1, 1)) BGridDesc_K0_N_K1
Definition device_gemm_xdl_skip_b_lds.hpp:191
bool IsSupportedArgument(const BaseArgument *p_arg) override
Definition device_gemm_xdl_skip_b_lds.hpp:438
std::string GetTypeString() const override
Definition device_gemm_xdl_skip_b_lds.hpp:511
static constexpr auto I0
Definition device_gemm_xdl_skip_b_lds.hpp:70
static auto MakeBGridDescriptor_K0_N_K1(index_t K, index_t N, index_t StrideB)
Definition device_gemm_xdl_skip_b_lds.hpp:116
static bool IsSupportedArgument(const Argument &arg)
Definition device_gemm_xdl_skip_b_lds.hpp:406
decltype(MakeAGridDescriptor_K0_M_K1(1, 1, 1)) AGridDesc_K0_M_K1
Definition device_gemm_xdl_skip_b_lds.hpp:190
static GET_NXDL_PER_WAVE_IMPL constexpr auto NXdlPerWave64
Definition device_gemm_xdl_skip_b_lds.hpp:67
static auto MakeArgument(const ADataType *p_a, const BDataType *p_b, CDataType *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op)
Definition device_gemm_xdl_skip_b_lds.hpp:443
static auto MakeCGridDescriptor_M_N(index_t M, index_t N, index_t StrideC)
Definition device_gemm_xdl_skip_b_lds.hpp:155
GridwiseGemmBase< math::max(NXdlPerWave64, 1)> GridwiseGemm64
Definition device_gemm_xdl_skip_b_lds.hpp:230
static auto MakeAGridDescriptor_K0_M_K1(index_t M, index_t K, index_t StrideA)
Definition device_gemm_xdl_skip_b_lds.hpp:77
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_a, const void *p_b, void *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op) override
Definition device_gemm_xdl_skip_b_lds.hpp:475
std::unique_ptr< BaseInvoker > MakeInvokerPointer() override
Definition device_gemm_xdl_skip_b_lds.hpp:505
GridwiseGemmBase< NXdlPerWave32 > GridwiseGemm32
Definition device_gemm_xdl_skip_b_lds.hpp:231
static constexpr auto K1Number
Definition device_gemm_xdl_skip_b_lds.hpp:74
static constexpr bool IsValidCompilationParameter()
Definition device_gemm_xdl_skip_b_lds.hpp:400