-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[clang][amdgpu] Add builtin for struct buffer lds load #148950
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang][amdgpu] Add builtin for struct buffer lds load #148950
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-ir Author: None (zGoldthorpe) ChangesThis is essentially just a revision of #137678 which only exposes a builtin for the intrinsic The reason for excluding the other intrinsics exposed by the cited PR is because the intrinsics taking a Full diff: https://github.com/llvm/llvm-project/pull/148950.diff 6 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 4d371a9f7d6db..2a4e8817d6d38 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -164,6 +164,7 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts")
+TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi", "t", "vmem-to-lds-load-insts")
//===----------------------------------------------------------------------===//
// Ballot builtins.
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index e6414a623b929..c23c98aa3aaeb 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -36,6 +36,7 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds:
+ case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_lds:
case AMDGPU::BI__builtin_amdgcn_load_to_lds:
case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
constexpr const int SizeIdx = 2;
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl
index 8256b61525f9d..197c1096640c0 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl
@@ -10,3 +10,7 @@
void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int offset, int soffset) {
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 3);
}
+
+void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int size, int vindex, int voffset, int soffset) {
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, 2, 3);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl
index 5915393ae7f56..8fbffbeea0531 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl
@@ -8,3 +8,10 @@ void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local vo
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}}
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
}
+
+void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int size, int vindex, int voffset, int soffset, int x) {
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, x, vindex, voffset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 3, vindex, voffset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl
index 74944f2d93c72..cb832b9aa4845 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl
@@ -5,6 +5,7 @@
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S -verify -o - %s
// REQUIRES: amdgpu-registered-target
-void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) {
+void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int vindex, int offset, int soffset) {
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}}
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}}
}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 8016757cf0f3c..6a100eaeefc59 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1932,7 +1932,9 @@ class AMDGPUStructBufferLoadLDS : Intrinsic <
ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS;
-class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
+class AMDGPUStructPtrBufferLoadLDS :
+ ClangBuiltin<"__builtin_amdgcn_struct_ptr_buffer_load_lds">,
+ Intrinsic <
[],
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
LLVMQualPointerType<3>, // LDS base offset
|
@llvm/pr-subscribers-clang Author: None (zGoldthorpe) ChangesThis is essentially just a revision of #137678 which only exposes a builtin for the intrinsic The reason for excluding the other intrinsics exposed by the cited PR is because the intrinsics taking a Full diff: https://github.com/llvm/llvm-project/pull/148950.diff 6 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 4d371a9f7d6db..2a4e8817d6d38 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -164,6 +164,7 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts")
+TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi", "t", "vmem-to-lds-load-insts")
//===----------------------------------------------------------------------===//
// Ballot builtins.
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index e6414a623b929..c23c98aa3aaeb 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -36,6 +36,7 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds:
+ case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_lds:
case AMDGPU::BI__builtin_amdgcn_load_to_lds:
case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
constexpr const int SizeIdx = 2;
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl
index 8256b61525f9d..197c1096640c0 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl
@@ -10,3 +10,7 @@
void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int offset, int soffset) {
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 3);
}
+
+void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int size, int vindex, int voffset, int soffset) {
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, 2, 3);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl
index 5915393ae7f56..8fbffbeea0531 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl
@@ -8,3 +8,10 @@ void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local vo
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}}
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
}
+
+void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int size, int vindex, int voffset, int soffset, int x) {
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, x, vindex, voffset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 3, vindex, voffset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl
index 74944f2d93c72..cb832b9aa4845 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl
@@ -5,6 +5,7 @@
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S -verify -o - %s
// REQUIRES: amdgpu-registered-target
-void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) {
+void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int vindex, int offset, int soffset) {
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}}
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}}
}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 8016757cf0f3c..6a100eaeefc59 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1932,7 +1932,9 @@ class AMDGPUStructBufferLoadLDS : Intrinsic <
ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS;
-class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
+class AMDGPUStructPtrBufferLoadLDS :
+ ClangBuiltin<"__builtin_amdgcn_struct_ptr_buffer_load_lds">,
+ Intrinsic <
[],
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
LLVMQualPointerType<3>, // LDS base offset
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM from the LLVM side, don't want to review Clang authoritatively
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great, thanks for picking this up!
Clang side is good too, only real way this could go wrong is the magic type string in the builtins since it hooks into existing behaviour.
This is essentially just a revision of #137678 which only exposes a builtin for the intrinsic
llvm.amdgcn.struct.ptr.buffer.load.lds
, which expects an__amdgpu_buffer_rsrc_t
rather than av4i32
as its first argument.The reason for excluding the other intrinsics exposed by the cited PR is because the intrinsics taking a
v4i32
are legacy and should be deprecated.