amd_transpose_load.hpp Source File

amd_transpose_load.hpp Source File#

Composable Kernel: amd_transpose_load.hpp Source File
amd_transpose_load.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5#include "data_type.hpp"
6
7namespace ck {
8
9#if defined(__gfx12__)
10template <typename T>
11__device__ auto amd_global_load_transpose_to_vgpr(const T* in_ptr)
12{
13 using vector_t = typename vector_type<T, 8>::type;
14 if constexpr(sizeof(T) == 2)
15 {
16 typedef __attribute__((__vector_size__(8 * sizeof(__fp16)))) __fp16 llvm_fp16x8_t;
17 __attribute__((address_space(1))) llvm_fp16x8_t* glb_ptr =
18 reinterpret_cast<__attribute__((address_space(1))) llvm_fp16x8_t*>(
19 reinterpret_cast<uintptr_t>(in_ptr));
20 return bit_cast<vector_t>(__builtin_amdgcn_global_load_tr_b128_v8f16(glb_ptr));
21 }
22 else if constexpr(sizeof(T) == 1)
23 {
24 typedef __attribute__((__vector_size__(2 * sizeof(int)))) int llvm_intx2_t;
25 __attribute__((address_space(1))) llvm_intx2_t* glb_ptr =
26 reinterpret_cast<__attribute__((address_space(1))) llvm_intx2_t*>(
27 reinterpret_cast<uintptr_t>(in_ptr));
28 return bit_cast<vector_t>(__builtin_amdgcn_global_load_tr_b64_v2i32(glb_ptr));
29 }
30 else
31 {
32 static_assert(false, "not implemented");
33 }
34}
35#endif
36
37} // namespace ck
Definition ck.hpp:268
__host__ __device__ constexpr Y bit_cast(const X &x)
Definition type.hpp:306
_W64 unsigned int uintptr_t
Definition stdint.h:164
Definition dtype_vector.hpp:10