batched_transpose_problem.hpp Source File

batched_transpose_problem.hpp Source File#

Composable Kernel: batched_transpose_problem.hpp Source File
batched_transpose_problem.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
6#include "ck_tile/core.hpp"
7#include <type_traits>
8
9namespace ck_tile {
10
11template <typename DataType_,
12 typename BlockTile, // Sequence<...
13 typename WarpLayout,
14 bool kPadM_ = false,
15 bool kPadN_ = false> // Sequence<...
17{
19
20 static constexpr index_t kMPerWarp = WarpLayout::at(number<0>{});
21 static constexpr index_t kNPerWarp = WarpLayout::at(number<1>{});
22
23 static constexpr index_t kMPerBlock = BlockTile::at(number<0>{});
24 static constexpr index_t kNPerBlock = BlockTile::at(number<1>{});
25
27
28 static constexpr bool kPadM = kPadM_;
29 static constexpr bool kPadN = kPadN_;
30
31 // 128-bit is the max single-instruction bandwidth for load/store
32 static constexpr index_t MaxLoadStoreSize = 16;
33 static constexpr index_t VectorSizeInput = kPadN ? 1 : MaxLoadStoreSize / sizeof(DataType);
34 static constexpr index_t VectorSizeOutput = kPadM ? 1 : MaxLoadStoreSize / sizeof(DataType);
35};
36} // namespace ck_tile
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_HOST_DEVICE constexpr index_t get_warp_size()
Definition arch.hpp:63
constant< v > number
Definition tile/core/numeric/integral_constant.hpp:37
int32_t index_t
Definition integer.hpp:9
Definition batched_transpose_problem.hpp:17
static constexpr bool kPadN
Definition batched_transpose_problem.hpp:29
static constexpr index_t kMPerWarp
Definition batched_transpose_problem.hpp:20
static constexpr index_t VectorSizeInput
Definition batched_transpose_problem.hpp:33
static constexpr index_t kNPerBlock
Definition batched_transpose_problem.hpp:24
remove_cvref_t< DataType_ > DataType
Definition batched_transpose_problem.hpp:18
static constexpr bool kPadM
Definition batched_transpose_problem.hpp:28
static constexpr index_t VectorSizeOutput
Definition batched_transpose_problem.hpp:34
static constexpr index_t kNPerWarp
Definition batched_transpose_problem.hpp:21
static constexpr index_t MaxLoadStoreSize
Definition batched_transpose_problem.hpp:32
static constexpr index_t kMPerBlock
Definition batched_transpose_problem.hpp:23
static constexpr index_t kBlockSize
Definition batched_transpose_problem.hpp:26