device_gemm_multiple_d_layernorm_wmma_cshuffle_v3.hpp File Reference#
device_gemm_multiple_d_layernorm_wmma_cshuffle_v3.hpp File Reference
#include <iostream>#include <sstream>#include "ck/utility/common_header.hpp"#include "ck/tensor_description/tensor_descriptor.hpp"#include "ck/tensor_description/tensor_descriptor_helper.hpp"#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"#include "ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm.hpp"#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"#include "ck/tensor_operation/gpu/device/matrix_padder.hpp"#include "ck/tensor_operation/gpu/grid/gridwise_gemm_wmma_cshuffle_v3.hpp"#include "ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp"#include "ck/host_utility/device_prop.hpp"#include "ck/host_utility/kernel_launch.hpp"Go to the source code of this file.
Namespaces | |
| namespace | ck |
| namespace | ck::tensor_operation |
| namespace | ck::tensor_operation::device |
Functions | |
| template<typename GridwiseGemm, typename EMeanVarDataType, bool HasMainKBlockLoop, InMemoryDataOperationEnum EGlobalMemoryDataOperation, index_t MinimumOccupancy = 1, TailNumber TailNum = TailNumber::Full> | |
| __global__ void | ck::kernel_gemm_multiple_d_welford_first_half_wmma_cshuffle_v3 (typename GridwiseGemm::Argument karg, EMeanVarDataType *__restrict__ p_welford_mean_grid, EMeanVarDataType *__restrict__ p_welford_var_grid, int32_t *__restrict__ p_welford_count_grid) |
| template<typename GridwiseWelfordLayernorm, typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename LayernormMeanVarGridDesc_M_NBlock, typename LayernormCountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation> | |
| __global__ void | ck::kernel_welford_layernorm2d_second_half (const EMeanVarDataType *__restrict__ p_e_grid, const EMeanVarDataType *__restrict__ p_in_welford_mean_grid, const EMeanVarDataType *__restrict__ p_in_welford_var_grid, const int32_t *__restrict__ p_in_welford_count_grid, const GammaDataType *__restrict__ p_gamma_grid, const BetaDataType *__restrict__ p_beta_grid, HDataType *__restrict__ p_h_grid, const EHGridDesc_M_N e_grid_desc_m_n, const EHGridDesc_M_N h_grid_desc_m_n, const LayernormMeanVarGridDesc_M_NBlock mean_var_grid_desc_m_nblock, const LayernormCountGridDesc_M_NBlock count_grid_desc_m_nblock, const GammaBetaGridDesc_N gamma_grid_desc_n, const GammaBetaGridDesc_N beta_grid_desc_n, index_t numMeanVarCountBlockTileIteration_N, index_t NBlockClusterLength, ComputeDataType epsilon, HElementwiseOperation h_element_op) |