From ebebc79cde11e890c47c051c61518d23be2b17fe Mon Sep 17 00:00:00 2001 From: Wenju He Date: Wed, 9 Jul 2025 03:11:56 -0700 Subject: [PATCH 1/7] [SYCL] Use SPIR-V built-in function call for all targets and add `BuiltIn` to the name Before this PR, SPIR-V built-in is represented as global variable for SPIR/SPIR-V targets and as function call for other targets in include/sycl/__spirv/spirv_vars.hpp. According to https://github.com/KhronosGroup/SPIRV-LLVM-Translator/blob/main/docs/SPIRVRepresentationInLLVM.rst, SPIR-V built-in variable can be mapped to either function call or global variable. So function call representation should work for SPIR-V target. Add `BuiltIn` to the name to align with SPIR-V friendly IR. --- .../libspirv/include/libspirv/async/common.h | 13 +- .../libspirv/workitem/get_global_offset.h | 4 +- .../libspirv/workitem/get_global_size.h | 4 +- .../include/libspirv/workitem/get_group_id.h | 4 +- .../include/libspirv/workitem/get_local_id.h | 4 +- .../libspirv/workitem/get_local_size.h | 4 +- .../workitem/get_max_sub_group_size.h | 2 +- .../libspirv/workitem/get_num_groups.h | 4 +- .../libspirv/workitem/get_num_sub_groups.h | 2 +- .../libspirv/workitem/get_sub_group_id.h | 2 +- .../workitem/get_sub_group_local_id.h | 2 +- .../libspirv/workitem/get_sub_group_size.h | 2 +- .../lib/amdgcn-amdhsa/assert/__assert_fail.ll | 20 +- .../lib/amdgcn-amdhsa/group/collectives.cl | 27 +- .../amdgcn-amdhsa/misc/sub_group_shuffle.cl | 4 +- .../workitem/get_global_offset.ll | 43 +- .../amdgcn-amdhsa/workitem/get_global_size.cl | 18 +- .../amdgcn-amdhsa/workitem/get_local_size.cl | 18 +- .../workitem/get_max_sub_group_size.cl | 2 +- .../amdgcn-amdhsa/workitem/get_num_groups.cl | 54 +- .../workitem/get_sub_group_id.cl | 14 +- .../workitem/get_sub_group_local_id.cl | 2 +- .../lib/amdgcn/workitem/get_global_size.cl | 15 +- .../lib/amdgcn/workitem/get_group_id.cl | 15 +- .../lib/amdgcn/workitem/get_local_id.cl | 15 +- .../lib/amdgcn/workitem/get_local_size.cl | 15 +- .../lib/amdgcn/workitem/get_num_groups.cl | 15 +- .../lib/generic/workitem/get_global_id.cl | 27 +- .../lib/generic/workitem/get_global_size.cl | 21 +- .../generic/workitem/get_local_linear_id.cl | 10 +- .../generic/workitem/get_num_sub_groups.cl | 10 +- .../generic/workitem/get_sub_group_size.cl | 16 +- .../native_cpu/workitem/get_global_size.cl | 21 +- .../native_cpu/workitem/get_num_sub_groups.cl | 2 +- .../native_cpu/workitem/get_sub_group_size.cl | 2 +- .../lib/ptx-nvidiacl/group/collectives.cl | 24 +- .../ptx-nvidiacl/workitem/get_global_id.cl | 54 +- .../workitem/get_global_offset.cl | 21 +- .../ptx-nvidiacl/workitem/get_global_size.cl | 21 +- .../lib/ptx-nvidiacl/workitem/get_group_id.cl | 21 +- .../lib/ptx-nvidiacl/workitem/get_local_id.cl | 21 +- .../ptx-nvidiacl/workitem/get_local_size.cl | 21 +- .../workitem/get_max_sub_group_size.cl | 2 +- .../ptx-nvidiacl/workitem/get_num_groups.cl | 21 +- .../ptx-nvidiacl/workitem/get_sub_group_id.cl | 16 +- .../workitem/get_sub_group_local_id.cl | 2 +- .../lib/r600/workitem/get_global_offset.cl | 36 +- .../lib/r600/workitem/get_global_size.cl | 15 +- .../lib/r600/workitem/get_group_id.cl | 15 +- .../lib/r600/workitem/get_local_id.cl | 15 +- .../lib/r600/workitem/get_local_size.cl | 15 +- .../lib/r600/workitem/get_num_groups.cl | 15 +- libdevice/crt_wrapper.cpp | 74 +- libdevice/include/sanitizer_utils.hpp | 18 +- libdevice/itt_compiler_wrappers.cpp | 30 +- libdevice/nativecpu_utils.cpp | 20 +- libdevice/sanitizer/asan_rtl.cpp | 12 +- libdevice/sanitizer/msan_rtl.cpp | 17 +- libdevice/sanitizer/tsan_rtl.cpp | 21 +- libdevice/spirv_vars.h | 76 +- llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp | 52 +- llvm/lib/SYCLLowerIR/LowerWGScope.cpp | 23 +- llvm/test/SYCLLowerIR/convergent.ll | 12 +- .../sycl-post-link/sycl-post-link-test.ll | 14 +- sycl/include/sycl/__spirv/spirv_vars.hpp | 188 +--- sycl/include/sycl/detail/helpers.hpp | 38 +- .../ext/oneapi/experimental/ballot_group.hpp | 2 +- .../oneapi/experimental/fixed_size_group.hpp | 6 +- .../experimental/opportunistic_group.hpp | 2 +- .../ext/oneapi/experimental/tangle_group.hpp | 2 +- .../sycl/ext/oneapi/sub_group_mask.hpp | 2 +- sycl/include/sycl/group.hpp | 18 +- sycl/include/sycl/nd_item.hpp | 14 +- sycl/include/sycl/stl_wrappers/assert.h | 12 +- sycl/include/sycl/stl_wrappers/cassert | 8 +- sycl/include/sycl/sub_group.hpp | 10 +- .../esimd/spirv_intrins_trans.cpp | 12 +- .../properties_kernel_sub_group_size.cpp | 79 +- .../properties_kernel_work_group_size.cpp | 9 +- ...properties_kernel_work_group_size_hint.cpp | 9 +- sycl/test/check_device_code/group_barrier.cpp | 14 +- sycl/test/check_device_code/group_load.cpp | 455 ++++---- .../group_load_store_alignment.cpp | 4 +- .../group_load_store_native_key.cpp | 37 +- sycl/test/check_device_code/group_store.cpp | 992 ++++++------------ .../extensions/bindless_images_SPIRV_inst.cpp | 5 +- sycl/test/include_deps/sycl_accessor.hpp.cpp | 2 +- sycl/test/include_deps/sycl_buffer.hpp.cpp | 2 +- .../include_deps/sycl_detail_core.hpp.cpp | 2 +- 89 files changed, 1280 insertions(+), 1781 deletions(-) diff --git a/libclc/libspirv/include/libspirv/async/common.h b/libclc/libspirv/include/libspirv/async/common.h index 824af8e8abf46..a024eaf0c4437 100644 --- a/libclc/libspirv/include/libspirv/async/common.h +++ b/libclc/libspirv/include/libspirv/async/common.h @@ -10,12 +10,13 @@ #define CLC_ASYNC_COMMON #define SET_GROUP_SIZE_AND_ID(SIZE, ID) \ - SIZE = __spirv_WorkgroupSize_x() * __spirv_WorkgroupSize_y() * \ - __spirv_WorkgroupSize_z(); \ - ID = (__spirv_WorkgroupSize_y() * __spirv_WorkgroupSize_x() * \ - __spirv_LocalInvocationId_z()) + \ - (__spirv_WorkgroupSize_x() * __spirv_LocalInvocationId_y()) + \ - __spirv_LocalInvocationId_x(); + SIZE = __spirv_BuiltInWorkgroupSize(0) * __spirv_BuiltInWorkgroupSize(1) * \ + __spirv_BuiltInWorkgroupSize(2); \ + ID = (__spirv_BuiltInWorkgroupSize(1) * __spirv_BuiltInWorkgroupSize(0) * \ + __spirv_BuiltInLocalInvocationId(2)) + \ + (__spirv_BuiltInWorkgroupSize(0) * \ + __spirv_BuiltInLocalInvocationId(1)) + \ + __spirv_BuiltInLocalInvocationId(0); // Macro used by all data types, for generic and nvidia, for async copy when // arch < sm80 diff --git a/libclc/libspirv/include/libspirv/workitem/get_global_offset.h b/libclc/libspirv/include/libspirv/workitem/get_global_offset.h index be1242cb71101..e3dac528c960b 100644 --- a/libclc/libspirv/include/libspirv/workitem/get_global_offset.h +++ b/libclc/libspirv/include/libspirv/workitem/get_global_offset.h @@ -6,6 +6,4 @@ // //===----------------------------------------------------------------------===// -_CLC_DECL _CLC_OVERLOAD size_t __spirv_GlobalOffset_x(); -_CLC_DECL _CLC_OVERLOAD size_t __spirv_GlobalOffset_y(); -_CLC_DECL _CLC_OVERLOAD size_t __spirv_GlobalOffset_z(); +_CLC_DECL _CLC_OVERLOAD size_t __spirv_BuiltInGlobalOffset(int); diff --git a/libclc/libspirv/include/libspirv/workitem/get_global_size.h b/libclc/libspirv/include/libspirv/workitem/get_global_size.h index 8322a29ebcd4a..0fbcdd6e48323 100644 --- a/libclc/libspirv/include/libspirv/workitem/get_global_size.h +++ b/libclc/libspirv/include/libspirv/workitem/get_global_size.h @@ -6,6 +6,4 @@ // //===----------------------------------------------------------------------===// -_CLC_DECL _CLC_OVERLOAD size_t __spirv_GlobalSize_x(); -_CLC_DECL _CLC_OVERLOAD size_t __spirv_GlobalSize_y(); -_CLC_DECL _CLC_OVERLOAD size_t __spirv_GlobalSize_z(); +_CLC_DECL _CLC_OVERLOAD size_t __spirv_BuiltInGlobalSize(int); diff --git a/libclc/libspirv/include/libspirv/workitem/get_group_id.h b/libclc/libspirv/include/libspirv/workitem/get_group_id.h index 1c0010442a740..5091cd6d87f73 100644 --- a/libclc/libspirv/include/libspirv/workitem/get_group_id.h +++ b/libclc/libspirv/include/libspirv/workitem/get_group_id.h @@ -6,6 +6,4 @@ // //===----------------------------------------------------------------------===// -_CLC_DECL _CLC_OVERLOAD size_t __spirv_WorkgroupId_x(); -_CLC_DECL _CLC_OVERLOAD size_t __spirv_WorkgroupId_y(); -_CLC_DECL _CLC_OVERLOAD size_t __spirv_WorkgroupId_z(); +_CLC_DECL _CLC_OVERLOAD size_t __spirv_BuiltInWorkgroupId(int); diff --git a/libclc/libspirv/include/libspirv/workitem/get_local_id.h b/libclc/libspirv/include/libspirv/workitem/get_local_id.h index 0a89d7d84a5e0..e3e0b0e28319e 100644 --- a/libclc/libspirv/include/libspirv/workitem/get_local_id.h +++ b/libclc/libspirv/include/libspirv/workitem/get_local_id.h @@ -6,6 +6,4 @@ // //===----------------------------------------------------------------------===// -_CLC_DECL _CLC_OVERLOAD size_t __spirv_LocalInvocationId_x(); -_CLC_DECL _CLC_OVERLOAD size_t __spirv_LocalInvocationId_y(); -_CLC_DECL _CLC_OVERLOAD size_t __spirv_LocalInvocationId_z(); +_CLC_DECL _CLC_OVERLOAD size_t __spirv_BuiltInLocalInvocationId(int); diff --git a/libclc/libspirv/include/libspirv/workitem/get_local_size.h b/libclc/libspirv/include/libspirv/workitem/get_local_size.h index 5699de48aca48..3a648a9927476 100644 --- a/libclc/libspirv/include/libspirv/workitem/get_local_size.h +++ b/libclc/libspirv/include/libspirv/workitem/get_local_size.h @@ -6,6 +6,4 @@ // //===----------------------------------------------------------------------===// -_CLC_DECL _CLC_OVERLOAD size_t __spirv_WorkgroupSize_x(); -_CLC_DECL _CLC_OVERLOAD size_t __spirv_WorkgroupSize_y(); -_CLC_DECL _CLC_OVERLOAD size_t __spirv_WorkgroupSize_z(); +_CLC_DECL _CLC_OVERLOAD size_t __spirv_BuiltInWorkgroupSize(int); diff --git a/libclc/libspirv/include/libspirv/workitem/get_max_sub_group_size.h b/libclc/libspirv/include/libspirv/workitem/get_max_sub_group_size.h index 3befd9abae240..97476359463ae 100644 --- a/libclc/libspirv/include/libspirv/workitem/get_max_sub_group_size.h +++ b/libclc/libspirv/include/libspirv/workitem/get_max_sub_group_size.h @@ -6,4 +6,4 @@ // //===----------------------------------------------------------------------===// -_CLC_DEF _CLC_OVERLOAD uint __spirv_SubgroupMaxSize(); +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupMaxSize(); diff --git a/libclc/libspirv/include/libspirv/workitem/get_num_groups.h b/libclc/libspirv/include/libspirv/workitem/get_num_groups.h index 4e3a24d5f78fb..b83e1ee1dd40e 100644 --- a/libclc/libspirv/include/libspirv/workitem/get_num_groups.h +++ b/libclc/libspirv/include/libspirv/workitem/get_num_groups.h @@ -6,6 +6,4 @@ // //===----------------------------------------------------------------------===// -_CLC_DECL _CLC_OVERLOAD size_t __spirv_NumWorkgroups_x(); -_CLC_DECL _CLC_OVERLOAD size_t __spirv_NumWorkgroups_y(); -_CLC_DECL _CLC_OVERLOAD size_t __spirv_NumWorkgroups_z(); +_CLC_DECL _CLC_OVERLOAD size_t __spirv_BuiltInNumWorkgroups(int); diff --git a/libclc/libspirv/include/libspirv/workitem/get_num_sub_groups.h b/libclc/libspirv/include/libspirv/workitem/get_num_sub_groups.h index c6341dd6c63f4..e8820897aa7ae 100644 --- a/libclc/libspirv/include/libspirv/workitem/get_num_sub_groups.h +++ b/libclc/libspirv/include/libspirv/workitem/get_num_sub_groups.h @@ -6,4 +6,4 @@ // //===----------------------------------------------------------------------===// -_CLC_DEF _CLC_OVERLOAD uint __spirv_NumSubgroups(); +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInNumSubgroups(); diff --git a/libclc/libspirv/include/libspirv/workitem/get_sub_group_id.h b/libclc/libspirv/include/libspirv/workitem/get_sub_group_id.h index 47f4e0c5afa7a..b438a3c71958d 100644 --- a/libclc/libspirv/include/libspirv/workitem/get_sub_group_id.h +++ b/libclc/libspirv/include/libspirv/workitem/get_sub_group_id.h @@ -6,4 +6,4 @@ // //===----------------------------------------------------------------------===// -_CLC_DEF _CLC_OVERLOAD uint __spirv_SubgroupId(); +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupId(); diff --git a/libclc/libspirv/include/libspirv/workitem/get_sub_group_local_id.h b/libclc/libspirv/include/libspirv/workitem/get_sub_group_local_id.h index f69319bcfad5a..e15dd8760eb72 100644 --- a/libclc/libspirv/include/libspirv/workitem/get_sub_group_local_id.h +++ b/libclc/libspirv/include/libspirv/workitem/get_sub_group_local_id.h @@ -6,4 +6,4 @@ // //===----------------------------------------------------------------------===// -_CLC_DEF _CLC_OVERLOAD uint __spirv_SubgroupLocalInvocationId(); +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupLocalInvocationId(); diff --git a/libclc/libspirv/include/libspirv/workitem/get_sub_group_size.h b/libclc/libspirv/include/libspirv/workitem/get_sub_group_size.h index 59066301ce6a1..e1613afe09522 100644 --- a/libclc/libspirv/include/libspirv/workitem/get_sub_group_size.h +++ b/libclc/libspirv/include/libspirv/workitem/get_sub_group_size.h @@ -6,4 +6,4 @@ // //===----------------------------------------------------------------------===// -_CLC_DEF _CLC_OVERLOAD uint __spirv_SubgroupSize(); +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupSize(); diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/assert/__assert_fail.ll b/libclc/libspirv/lib/amdgcn-amdhsa/assert/__assert_fail.ll index 3c523f6d9f9dd..fc41e1b972fe5 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/assert/__assert_fail.ll +++ b/libclc/libspirv/lib/amdgcn-amdhsa/assert/__assert_fail.ll @@ -14,13 +14,9 @@ declare i64 @__ockl_fprintf_stderr_begin() local_unnamed_addr declare i64 @__ockl_fprintf_append_string_n(i64, i8* readonly, i64, i32) local_unnamed_addr declare i64 @__ockl_fprintf_append_args(i64, i32, i64, i64, i64, i64, i64, i64, i64, i32) local_unnamed_addr -declare dso_local i64 @_Z28__spirv_GlobalInvocationId_xv() local_unnamed_addr -declare dso_local i64 @_Z28__spirv_GlobalInvocationId_yv() local_unnamed_addr -declare dso_local i64 @_Z28__spirv_GlobalInvocationId_zv() local_unnamed_addr +declare dso_local i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32) local_unnamed_addr -declare dso_local i64 @_Z27__spirv_LocalInvocationId_xv() local_unnamed_addr -declare dso_local i64 @_Z27__spirv_LocalInvocationId_yv() local_unnamed_addr -declare dso_local i64 @_Z27__spirv_LocalInvocationId_zv() local_unnamed_addr +declare dso_local i64 @_Z32__spirv_BuiltInLocalInvocationIdi(i32) local_unnamed_addr define dso_local hidden noundef i64 @__strlen_assert(i8* noundef %str) local_unnamed_addr { entry: @@ -50,12 +46,12 @@ entry: %msg.3 = call i64 @__ockl_fprintf_append_args(i64 %msg.2, i32 1, i64 %line.i64, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 0) %len.func = call i64 @__strlen_assert(i8* %function) %msg.4 = call i64 @__ockl_fprintf_append_string_n(i64 %msg.3, i8* readonly %function, i64 %len.func, i32 0) - %gidx = tail call i64 @_Z28__spirv_GlobalInvocationId_xv() - %gidy = tail call i64 @_Z28__spirv_GlobalInvocationId_yv() - %gidz = tail call i64 @_Z28__spirv_GlobalInvocationId_zv() - %lidx = tail call i64 @_Z27__spirv_LocalInvocationId_xv() - %lidy = tail call i64 @_Z27__spirv_LocalInvocationId_yv() - %lidz = tail call i64 @_Z27__spirv_LocalInvocationId_zv() + %gidx = tail call i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0) + %gidy = tail call i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 1) + %gidz = tail call i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 2) + %lidx = tail call i64 @_Z32__spirv_BuiltInLocalInvocationIdi(i32 0) + %lidy = tail call i64 @_Z32__spirv_BuiltInLocalInvocationIdi(i32 1) + %lidz = tail call i64 @_Z32__spirv_BuiltInLocalInvocationIdi(i32 2) %msg.5 = call i64 @__ockl_fprintf_append_args(i64 %msg.4, i32 6, i64 %gidx, i64 %gidy, i64 %gidz, i64 %lidx, i64 %lidy, i64 %lidz, i64 0, i32 0) %len.assertion = call i64 @__strlen_assert(i8* %assertion) %msg.6 = call i64 @__ockl_fprintf_append_string_n(i64 %msg.4, i8* readonly %assertion, i64 %len.assertion, i32 1) diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/group/collectives.cl b/libclc/libspirv/lib/amdgcn-amdhsa/group/collectives.cl index 4bc29cb0b52c8..4a25ee769b599 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/group/collectives.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/group/collectives.cl @@ -51,9 +51,9 @@ __clc__get_group_scratch_double() __asm("__clc__get_group_scratch_double"); #define __CLC_LOGICAL_AND(x, y) (x && y) #define __CLC_SUBGROUP_COLLECTIVE_BODY(OP, TYPE, IDENTITY) \ - uint sg_lid = __spirv_SubgroupLocalInvocationId(); \ + uint sg_lid = __spirv_BuiltInSubgroupLocalInvocationId(); \ /* Can't use XOR/butterfly shuffles; some lanes may be inactive */ \ - for (int o = 1; o < __spirv_SubgroupMaxSize(); o *= 2) { \ + for (int o = 1; o < __spirv_BuiltInSubgroupMaxSize(); o *= 2) { \ TYPE contribution = __spirv_SubgroupShuffleUpINTEL(x, x, o); \ bool inactive = (sg_lid < o); \ contribution = (inactive) ? IDENTITY : contribution; \ @@ -62,7 +62,8 @@ __clc__get_group_scratch_double() __asm("__clc__get_group_scratch_double"); /* For Reduce, broadcast result from highest active lane */ \ TYPE result; \ if (op == Reduce) { \ - result = __spirv_SubgroupShuffleINTEL(x, __spirv_SubgroupSize() - 1); \ + result = \ + __spirv_SubgroupShuffleINTEL(x, __spirv_BuiltInSubgroupSize() - 1); \ *carry = result; \ } /* For InclusiveScan, use results as computed */ \ else if (op == InclusiveScan) { \ @@ -171,7 +172,7 @@ __CLC_SUBGROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, true) #define __CLC_GROUP_COLLECTIVE_INNER(SPIRV_NAME, CLC_NAME, OP, TYPE, IDENTITY) \ _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __CLC_APPEND( \ - __spirv_Group, SPIRV_NAME)(int scope, int op, TYPE x) { \ + __spirv_Group, SPIRV_NAME)(int scope, int op, TYPE x) { \ TYPE carry = IDENTITY; \ /* Perform GroupOperation within sub-group */ \ TYPE sg_x = __CLC_APPEND(__clc__Subgroup, CLC_NAME)(op, x, &carry); \ @@ -179,10 +180,10 @@ __CLC_SUBGROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, true) return sg_x; \ } \ __local TYPE *scratch = __CLC_APPEND(__clc__get_group_scratch_, TYPE)(); \ - uint sg_id = __spirv_SubgroupId(); \ - uint num_sg = __spirv_NumSubgroups(); \ - uint sg_lid = __spirv_SubgroupLocalInvocationId(); \ - uint sg_size = __spirv_SubgroupSize(); \ + uint sg_id = __spirv_BuiltInSubgroupId(); \ + uint num_sg = __spirv_BuiltInNumSubgroups(); \ + uint sg_lid = __spirv_BuiltInSubgroupLocalInvocationId(); \ + uint sg_size = __spirv_BuiltInSubgroupSize(); \ /* Share carry values across sub-groups */ \ if (sg_lid == sg_size - 1) { \ scratch[sg_id] = carry; \ @@ -329,15 +330,15 @@ __CLC_GROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, true) #undef __CLC_MUL long __clc__2d_to_linear_local_id(ulong2 id) { - size_t size_x = __spirv_WorkgroupSize_x(); - size_t size_y = __spirv_WorkgroupSize_y(); + size_t size_x = __spirv_BuiltInWorkgroupSize(0); + size_t size_y = __spirv_BuiltInWorkgroupSize(1); return (id.y * size_x + id.x); } long __clc__3d_to_linear_local_id(ulong3 id) { - size_t size_x = __spirv_WorkgroupSize_x(); - size_t size_y = __spirv_WorkgroupSize_y(); - size_t size_z = __spirv_WorkgroupSize_z(); + size_t size_x = __spirv_BuiltInWorkgroupSize(0); + size_t size_y = __spirv_BuiltInWorkgroupSize(1); + size_t size_z = __spirv_BuiltInWorkgroupSize(2); return (id.z * size_y * size_x + id.y * size_x + id.x); } diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/misc/sub_group_shuffle.cl b/libclc/libspirv/lib/amdgcn-amdhsa/misc/sub_group_shuffle.cl index 59498a433d84f..a8f31e2ccdf71 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/misc/sub_group_shuffle.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/misc/sub_group_shuffle.cl @@ -8,8 +8,8 @@ #include -#define SELF __spirv_SubgroupLocalInvocationId(); -#define SUBGROUP_SIZE __spirv_SubgroupMaxSize() +#define SELF __spirv_BuiltInSubgroupLocalInvocationId(); +#define SUBGROUP_SIZE __spirv_BuiltInSubgroupMaxSize() // Shuffle _CLC_OVERLOAD _CLC_DEF int diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_global_offset.ll b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_global_offset.ll index 0ed1864a499cb..af744aa984344 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_global_offset.ll +++ b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_global_offset.ll @@ -9,28 +9,35 @@ ; Function Attrs: nounwind readnone speculatable declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset() -define hidden i64 @_Z22__spirv_GlobalOffset_xv() nounwind alwaysinline { +define hidden i64 @_Z27__spirv_BuiltInGlobalOffseti(i32 %dim) nounwind alwaysinline { entry: + switch i32 %dim, label %return [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb3 + ] + +sw.bb: ; preds = %entry %0 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() %1 = load i32, i32 addrspace(5)* %0, align 4 %zext = zext i32 %1 to i64 - ret i64 %zext -} + br label %return -define hidden i64 @_Z22__spirv_GlobalOffset_yv() nounwind alwaysinline { -entry: - %0 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() - %arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 1 - %1 = load i32, i32 addrspace(5)* %arrayidx, align 4 - %zext = zext i32 %1 to i64 - ret i64 %zext -} +sw.bb1: ; preds = %entry + %2 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() + %arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %2, i64 1 + %3 = load i32, i32 addrspace(5)* %arrayidx, align 4 + %zext2 = zext i32 %3 to i64 + br label %return -define hidden i64 @_Z22__spirv_GlobalOffset_zv() nounwind alwaysinline { -entry: - %0 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() - %arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 2 - %1 = load i32, i32 addrspace(5)* %arrayidx, align 4 - %zext = zext i32 %1 to i64 - ret i64 %zext +sw.bb3: ; preds = %entry + %4 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(5)* %4, i64 2 + %5 = load i32, i32 addrspace(5)* %arrayidx2, align 4 + %zext3 = zext i32 %5 to i64 + br label %return + +return: ; preds = %entry, %sw.bb3, %sw.bb1, %sw.bb + %retval = phi i64 [ %zext, %sw.bb ], [ %zext2, %sw.bb1 ], [ %zext3, %sw.bb3 ], [ 0, %entry ] + ret i64 %retval } diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_global_size.cl b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_global_size.cl index 9988816374023..0096cb9873969 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_global_size.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_global_size.cl @@ -24,17 +24,21 @@ CONST_AS uchar * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr"); #endif -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_x() { +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalSize(int dim) { + switch (dim) { + case 0: { CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr(); return ptr[3]; -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_y() { + } + case 1: { CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr(); return ptr[4]; -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_z() { + } + case 2: { CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr(); return ptr[5]; + } + default: + return 1; + } } diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_local_size.cl b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_local_size.cl index eeb7cefab8c14..29ecd8d099cbe 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_local_size.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_local_size.cl @@ -26,17 +26,21 @@ __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr"); // Mimic `EmitAMDGPUWorkGroupSize` in `clang/lib/CodeGen/CGBuiltin.cpp`. -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_x() { +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInWorkgroupSize(int dim) { + switch (dim) { + case 0: { CONST_AS ushort * ptr = (CONST_AS ushort *) __dispatch_ptr(); return ptr[2]; -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_y() { + } + case 1: { CONST_AS ushort * ptr = (CONST_AS ushort *) __dispatch_ptr(); return ptr[3]; -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_z() { + } + case 2: { CONST_AS ushort * ptr = (CONST_AS ushort *) __dispatch_ptr(); return ptr[4]; + } + default: + return 1; + } } diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_max_sub_group_size.cl b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_max_sub_group_size.cl index 04c9467c088bd..7a525e1ef06bd 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_max_sub_group_size.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_max_sub_group_size.cl @@ -14,7 +14,7 @@ // 32. extern constant unsigned char __oclc_wavefrontsize64; -_CLC_DEF _CLC_OVERLOAD uint __spirv_SubgroupMaxSize() { +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupMaxSize() { if (__oclc_wavefrontsize64 == 1) { return 64; } diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_num_groups.cl b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_num_groups.cl index a2977add7ac45..10aa90701f2b0 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_num_groups.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_num_groups.cl @@ -8,32 +8,36 @@ #include -_CLC_DEF _CLC_OVERLOAD size_t __spirv_NumWorkgroups_x() { - size_t global_size = __spirv_GlobalSize_x(); - size_t local_size = __spirv_WorkgroupSize_x(); - size_t num_groups = global_size / local_size; - if (global_size % local_size != 0) { - num_groups++; +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInNumWorkgroups(int dim) { + switch (dim) { + case 0: { + size_t global_size = __spirv_BuiltInGlobalSize(0); + size_t local_size = __spirv_BuiltInWorkgroupSize(0); + size_t num_groups = global_size / local_size; + if (global_size % local_size != 0) { + num_groups++; + } + return num_groups; } - return num_groups; -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_NumWorkgroups_y() { - size_t global_size = __spirv_GlobalSize_y(); - size_t local_size = __spirv_WorkgroupSize_y(); - size_t num_groups = global_size / local_size; - if (global_size % local_size != 0) { - num_groups++; + case 1: { + size_t global_size = __spirv_BuiltInGlobalSize(1); + size_t local_size = __spirv_BuiltInWorkgroupSize(1); + size_t num_groups = global_size / local_size; + if (global_size % local_size != 0) { + num_groups++; + } + return num_groups; } - return num_groups; -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_NumWorkgroups_z() { - size_t global_size = __spirv_GlobalSize_z(); - size_t local_size = __spirv_WorkgroupSize_z(); - size_t num_groups = global_size / local_size; - if (global_size % local_size != 0) { - num_groups++; + case 2: { + size_t global_size = __spirv_BuiltInGlobalSize(2); + size_t local_size = __spirv_BuiltInWorkgroupSize(2); + size_t num_groups = global_size / local_size; + if (global_size % local_size != 0) { + num_groups++; + } + return num_groups; + } + default: + return 0; } - return num_groups; } diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_sub_group_id.cl b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_sub_group_id.cl index 4651da12be006..440fd91b3f4a3 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_sub_group_id.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_sub_group_id.cl @@ -8,12 +8,12 @@ #include -_CLC_DEF _CLC_OVERLOAD uint __spirv_SubgroupId() { - size_t id_x = __spirv_LocalInvocationId_x(); - size_t id_y = __spirv_LocalInvocationId_y(); - size_t id_z = __spirv_LocalInvocationId_z(); - size_t size_x = __spirv_WorkgroupSize_x(); - size_t size_y = __spirv_WorkgroupSize_y(); - uint sg_size = __spirv_SubgroupMaxSize(); +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupId() { + size_t id_x = __spirv_BuiltInLocalInvocationId(0); + size_t id_y = __spirv_BuiltInLocalInvocationId(1); + size_t id_z = __spirv_BuiltInLocalInvocationId(2); + size_t size_x = __spirv_BuiltInWorkgroupSize(0); + size_t size_y = __spirv_BuiltInWorkgroupSize(1); + uint sg_size = __spirv_BuiltInSubgroupMaxSize(); return (id_z * size_y * size_x + id_y * size_x + id_x) / sg_size; } diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_sub_group_local_id.cl b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_sub_group_local_id.cl index f0b2559c43997..609447b20cab0 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_sub_group_local_id.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_sub_group_local_id.cl @@ -8,6 +8,6 @@ #include -_CLC_DEF _CLC_OVERLOAD uint __spirv_SubgroupLocalInvocationId() { +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupLocalInvocationId() { return __builtin_amdgcn_mbcnt_hi(-1, __builtin_amdgcn_mbcnt_lo(-1, 0)); } diff --git a/libclc/libspirv/lib/amdgcn/workitem/get_global_size.cl b/libclc/libspirv/lib/amdgcn/workitem/get_global_size.cl index 56011f0494e42..94c4a6fbba67d 100644 --- a/libclc/libspirv/lib/amdgcn/workitem/get_global_size.cl +++ b/libclc/libspirv/lib/amdgcn/workitem/get_global_size.cl @@ -8,14 +8,15 @@ #include -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_x() { +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalSize(int dim) { + switch (dim) { + case 0: return __builtin_amdgcn_grid_size_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_y() { + case 1: return __builtin_amdgcn_grid_size_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_z() { + case 2: return __builtin_amdgcn_grid_size_z(); + default: + return 1; + } } diff --git a/libclc/libspirv/lib/amdgcn/workitem/get_group_id.cl b/libclc/libspirv/lib/amdgcn/workitem/get_group_id.cl index d3bc41c5499cb..1bd2c70194de5 100644 --- a/libclc/libspirv/lib/amdgcn/workitem/get_group_id.cl +++ b/libclc/libspirv/lib/amdgcn/workitem/get_group_id.cl @@ -8,14 +8,15 @@ #include -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupId_x() { +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInWorkgroupId(int dim) { + switch (dim) { + case 0: return __builtin_amdgcn_workgroup_id_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupId_y() { + case 1: return __builtin_amdgcn_workgroup_id_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupId_z() { + case 2: return __builtin_amdgcn_workgroup_id_z(); + default: + return 0; + } } diff --git a/libclc/libspirv/lib/amdgcn/workitem/get_local_id.cl b/libclc/libspirv/lib/amdgcn/workitem/get_local_id.cl index 5fa0905b76c31..b19bcab5c31d6 100644 --- a/libclc/libspirv/lib/amdgcn/workitem/get_local_id.cl +++ b/libclc/libspirv/lib/amdgcn/workitem/get_local_id.cl @@ -8,14 +8,15 @@ #include -_CLC_DEF _CLC_OVERLOAD size_t __spirv_LocalInvocationId_x() { +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInLocalInvocationId(int dim) { + switch (dim) { + case 0: return __builtin_amdgcn_workitem_id_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_LocalInvocationId_y() { + case 1: return __builtin_amdgcn_workitem_id_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_LocalInvocationId_z() { + case 2: return __builtin_amdgcn_workitem_id_z(); + default: + return 0; + } } diff --git a/libclc/libspirv/lib/amdgcn/workitem/get_local_size.cl b/libclc/libspirv/lib/amdgcn/workitem/get_local_size.cl index 4ca0d3fe5ea37..ce71723dd3317 100644 --- a/libclc/libspirv/lib/amdgcn/workitem/get_local_size.cl +++ b/libclc/libspirv/lib/amdgcn/workitem/get_local_size.cl @@ -12,14 +12,15 @@ uint __clc_amdgcn_get_local_size_x(void) __asm("llvm.r600.read.local.size.x"); uint __clc_amdgcn_get_local_size_y(void) __asm("llvm.r600.read.local.size.y"); uint __clc_amdgcn_get_local_size_z(void) __asm("llvm.r600.read.local.size.z"); -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_x() { +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInWorkgroupSize(int dim) { + switch (dim) { + case 0: return __clc_amdgcn_get_local_size_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_y() { + case 1: return __clc_amdgcn_get_local_size_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_z() { + case 2: return __clc_amdgcn_get_local_size_z(); + default: + return 0; + } } diff --git a/libclc/libspirv/lib/amdgcn/workitem/get_num_groups.cl b/libclc/libspirv/lib/amdgcn/workitem/get_num_groups.cl index ec95b3cffb487..beb54a8bad199 100644 --- a/libclc/libspirv/lib/amdgcn/workitem/get_num_groups.cl +++ b/libclc/libspirv/lib/amdgcn/workitem/get_num_groups.cl @@ -12,14 +12,15 @@ uint __clc_amdgcn_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x"); uint __clc_amdgcn_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y"); uint __clc_amdgcn_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z"); -_CLC_DEF _CLC_OVERLOAD size_t __spirv_NumWorkgroups_x() { +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInNumWorkgroups(int dim) { + switch (dim) { + case 0: return __clc_amdgcn_get_num_groups_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_NumWorkgroups_y() { + case 1: return __clc_amdgcn_get_num_groups_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_NumWorkgroups_z() { + case 2: return __clc_amdgcn_get_num_groups_z(); + default: + return 0; + } } diff --git a/libclc/libspirv/lib/generic/workitem/get_global_id.cl b/libclc/libspirv/lib/generic/workitem/get_global_id.cl index 5a3ed6fdc3aa3..11f465e76a563 100644 --- a/libclc/libspirv/lib/generic/workitem/get_global_id.cl +++ b/libclc/libspirv/lib/generic/workitem/get_global_id.cl @@ -8,17 +8,18 @@ #include -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_x() { - return __spirv_WorkgroupId_x() * __spirv_WorkgroupSize_x() + __spirv_LocalInvocationId_x() + - __spirv_GlobalOffset_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_y() { - return __spirv_WorkgroupId_y() * __spirv_WorkgroupSize_y() + __spirv_LocalInvocationId_y() + - __spirv_GlobalOffset_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_z() { - return __spirv_WorkgroupId_z() * __spirv_WorkgroupSize_z() + __spirv_LocalInvocationId_z() + - __spirv_GlobalOffset_z(); +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalInvocationId(int dim) { + switch (dim) { + case 0: + return __spirv_BuiltInWorkgroupId(0) * __spirv_BuiltInWorkgroupSize(0) + + __spirv_BuiltInLocalInvocationId(0) + __spirv_BuiltInGlobalOffset(0); + case 1: + return __spirv_BuiltInWorkgroupId(1) * __spirv_BuiltInWorkgroupSize(1) + + __spirv_BuiltInLocalInvocationId(1) + __spirv_BuiltInGlobalOffset(1); + case 2: + return __spirv_BuiltInWorkgroupId(2) * __spirv_BuiltInWorkgroupSize(2) + + __spirv_BuiltInLocalInvocationId(2) + __spirv_BuiltInGlobalOffset(2); + default: + return 0; + } } diff --git a/libclc/libspirv/lib/generic/workitem/get_global_size.cl b/libclc/libspirv/lib/generic/workitem/get_global_size.cl index 80975d920c262..708f0592487c0 100644 --- a/libclc/libspirv/lib/generic/workitem/get_global_size.cl +++ b/libclc/libspirv/lib/generic/workitem/get_global_size.cl @@ -8,14 +8,15 @@ #include -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_x() { - return __spirv_NumWorkgroups_x() * __spirv_WorkgroupSize_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_y() { - return __spirv_NumWorkgroups_y() * __spirv_WorkgroupSize_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_z() { - return __spirv_NumWorkgroups_z() * __spirv_WorkgroupSize_z(); +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalSize(int dim) { + switch (dim) { + case 0: + return __spirv_BuiltInNumWorkgroups(0) * __spirv_BuiltInWorkgroupSize(0); + case 1: + return __spirv_BuiltInNumWorkgroups(1) * __spirv_BuiltInWorkgroupSize(1); + case 2: + return __spirv_BuiltInNumWorkgroups(2) * __spirv_BuiltInWorkgroupSize(2); + default: + return 1; + } } diff --git a/libclc/libspirv/lib/generic/workitem/get_local_linear_id.cl b/libclc/libspirv/lib/generic/workitem/get_local_linear_id.cl index 6b61944476842..44aa37f011777 100644 --- a/libclc/libspirv/lib/generic/workitem/get_local_linear_id.cl +++ b/libclc/libspirv/lib/generic/workitem/get_local_linear_id.cl @@ -8,9 +8,9 @@ #include -_CLC_DEF _CLC_OVERLOAD size_t __spirv_LocalInvocationIndex() { - return __spirv_LocalInvocationId_z() * __spirv_WorkgroupSize_y() * - __spirv_WorkgroupSize_x() + - __spirv_LocalInvocationId_y() * __spirv_WorkgroupSize_x() + - __spirv_LocalInvocationId_x(); +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInLocalInvocationIndex() { + return __spirv_BuiltInLocalInvocationId(2) * __spirv_BuiltInWorkgroupSize(1) * + __spirv_BuiltInWorkgroupSize(0) + + __spirv_BuiltInLocalInvocationId(1) * __spirv_BuiltInWorkgroupSize(0) + + __spirv_BuiltInLocalInvocationId(0); } diff --git a/libclc/libspirv/lib/generic/workitem/get_num_sub_groups.cl b/libclc/libspirv/lib/generic/workitem/get_num_sub_groups.cl index 30d9bc9b2d8be..63f7c3a93ca90 100644 --- a/libclc/libspirv/lib/generic/workitem/get_num_sub_groups.cl +++ b/libclc/libspirv/lib/generic/workitem/get_num_sub_groups.cl @@ -8,11 +8,11 @@ #include -_CLC_DEF _CLC_OVERLOAD uint __spirv_NumSubgroups() { - size_t size_x = __spirv_WorkgroupSize_x(); - size_t size_y = __spirv_WorkgroupSize_y(); - size_t size_z = __spirv_WorkgroupSize_z(); - uint sg_size = __spirv_SubgroupMaxSize(); +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInNumSubgroups() { + size_t size_x = __spirv_BuiltInWorkgroupSize(0); + size_t size_y = __spirv_BuiltInWorkgroupSize(1); + size_t size_z = __spirv_BuiltInWorkgroupSize(2); + uint sg_size = __spirv_BuiltInSubgroupMaxSize(); size_t linear_size = size_z * size_y * size_x; return (uint)((linear_size + sg_size - 1) / sg_size); } diff --git a/libclc/libspirv/lib/generic/workitem/get_sub_group_size.cl b/libclc/libspirv/lib/generic/workitem/get_sub_group_size.cl index 82d5f9433cce2..d0cb4acc8a60f 100644 --- a/libclc/libspirv/lib/generic/workitem/get_sub_group_size.cl +++ b/libclc/libspirv/lib/generic/workitem/get_sub_group_size.cl @@ -8,15 +8,15 @@ #include -_CLC_DEF _CLC_OVERLOAD uint __spirv_SubgroupSize() { - if (__spirv_SubgroupId() != __spirv_NumSubgroups() - 1) { - return __spirv_SubgroupMaxSize(); +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupSize() { + if (__spirv_BuiltInSubgroupId() != __spirv_BuiltInNumSubgroups() - 1) { + return __spirv_BuiltInSubgroupMaxSize(); } - size_t size_x = __spirv_WorkgroupSize_x(); - size_t size_y = __spirv_WorkgroupSize_y(); - size_t size_z = __spirv_WorkgroupSize_z(); + size_t size_x = __spirv_BuiltInWorkgroupSize(0); + size_t size_y = __spirv_BuiltInWorkgroupSize(1); + size_t size_z = __spirv_BuiltInWorkgroupSize(2); size_t linear_size = size_z * size_y * size_x; - size_t uniform_groups = __spirv_NumSubgroups() - 1; - size_t uniform_size = __spirv_SubgroupMaxSize() * uniform_groups; + size_t uniform_groups = __spirv_BuiltInNumSubgroups() - 1; + size_t uniform_size = __spirv_BuiltInSubgroupMaxSize() * uniform_groups; return linear_size - uniform_size; } diff --git a/libclc/libspirv/lib/native_cpu/workitem/get_global_size.cl b/libclc/libspirv/lib/native_cpu/workitem/get_global_size.cl index 07bbc6102e6f2..fa4afed4f44c3 100644 --- a/libclc/libspirv/lib/native_cpu/workitem/get_global_size.cl +++ b/libclc/libspirv/lib/native_cpu/workitem/get_global_size.cl @@ -10,14 +10,15 @@ ulong __mux_get_global_size(int); -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_x() { - return __mux_get_global_size(0); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_y() { - return __mux_get_global_size(1); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_z() { - return __mux_get_global_size(2); +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalSize(int dim) { + switch (dim) { + case 0: + return __mux_get_global_size(0); + case 1: + return __mux_get_global_size(1); + case 2: + return __mux_get_global_size(2); + default: + return 1; + } } diff --git a/libclc/libspirv/lib/native_cpu/workitem/get_num_sub_groups.cl b/libclc/libspirv/lib/native_cpu/workitem/get_num_sub_groups.cl index 8ac4aa5565c73..22c4e754b160f 100644 --- a/libclc/libspirv/lib/native_cpu/workitem/get_num_sub_groups.cl +++ b/libclc/libspirv/lib/native_cpu/workitem/get_num_sub_groups.cl @@ -10,6 +10,6 @@ uint __mux_get_num_sub_groups(); -_CLC_DEF _CLC_OVERLOAD uint __spirv_NumSubgroups() { +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInNumSubgroups() { return __mux_get_num_sub_groups(); } diff --git a/libclc/libspirv/lib/native_cpu/workitem/get_sub_group_size.cl b/libclc/libspirv/lib/native_cpu/workitem/get_sub_group_size.cl index 3ca3890033271..2b08195964214 100644 --- a/libclc/libspirv/lib/native_cpu/workitem/get_sub_group_size.cl +++ b/libclc/libspirv/lib/native_cpu/workitem/get_sub_group_size.cl @@ -10,6 +10,6 @@ uint __mux_get_sub_group_size(); -_CLC_DEF _CLC_OVERLOAD uint __spirv_SubgroupSize() { +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupSize() { return __mux_get_sub_group_size(); } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/group/collectives.cl b/libclc/libspirv/lib/ptx-nvidiacl/group/collectives.cl index 486dc74a9b4b2..64f0d8354f2cf 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/group/collectives.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/group/collectives.cl @@ -298,9 +298,9 @@ complex_double __muldc3(double a, double b, double c, double d) { #define __CLC_COMPLEX_MUL(x, y) __clc_complex_mul(x, y) #define __CLC_SUBGROUP_COLLECTIVE_BODY(OP, TYPE, IDENTITY) \ - uint sg_lid = __spirv_SubgroupLocalInvocationId(); \ + uint sg_lid = __spirv_BuiltInSubgroupLocalInvocationId(); \ /* Can't use XOR/butterfly shuffles; some lanes may be inactive */ \ - for (int o = 1; o < __spirv_SubgroupMaxSize(); o *= 2) { \ + for (int o = 1; o < __spirv_BuiltInSubgroupMaxSize(); o *= 2) { \ TYPE contribution = __clc__SubgroupShuffleUp(x, o); \ bool inactive = (sg_lid < o); \ contribution = (inactive) ? IDENTITY : contribution; \ @@ -309,7 +309,7 @@ complex_double __muldc3(double a, double b, double c, double d) { /* For Reduce, broadcast result from highest active lane */ \ TYPE result; \ if (op == Reduce) { \ - result = __clc__SubgroupShuffle(x, __spirv_SubgroupSize() - 1); \ + result = __clc__SubgroupShuffle(x, __spirv_BuiltInSubgroupSize() - 1); \ *carry = result; \ } /* For InclusiveScan, use results as computed */ \ else if (op == InclusiveScan) { \ @@ -441,10 +441,10 @@ __CLC_SUBGROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, true) return sg_x; \ } \ __local TYPE *scratch = __CLC_APPEND(__clc__get_group_scratch_, TYPE)(); \ - uint sg_id = __spirv_SubgroupId(); \ - uint num_sg = __spirv_NumSubgroups(); \ - uint sg_lid = __spirv_SubgroupLocalInvocationId(); \ - uint sg_size = __spirv_SubgroupSize(); \ + uint sg_id = __spirv_BuiltInSubgroupId(); \ + uint num_sg = __spirv_BuiltInNumSubgroups(); \ + uint sg_lid = __spirv_BuiltInSubgroupLocalInvocationId(); \ + uint sg_size = __spirv_BuiltInSubgroupSize(); \ /* Share carry values across sub-groups */ \ if (sg_lid == sg_size - 1) { \ scratch[sg_id] = carry; \ @@ -613,15 +613,15 @@ __CLC_GROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, true) #undef __CLC_MUL long __clc__2d_to_linear_local_id(ulong2 id) { - size_t size_x = __spirv_WorkgroupSize_x(); - size_t size_y = __spirv_WorkgroupSize_y(); + size_t size_x = __spirv_BuiltInWorkgroupSize(0); + size_t size_y = __spirv_BuiltInWorkgroupSize(1); return (id.y * size_x + id.x); } long __clc__3d_to_linear_local_id(ulong3 id) { - size_t size_x = __spirv_WorkgroupSize_x(); - size_t size_y = __spirv_WorkgroupSize_y(); - size_t size_z = __spirv_WorkgroupSize_z(); + size_t size_x = __spirv_BuiltInWorkgroupSize(0); + size_t size_y = __spirv_BuiltInWorkgroupSize(1); + size_t size_z = __spirv_BuiltInWorkgroupSize(2); return (id.z * size_y * size_x + id.y * size_x + id.x); } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl index c87d2679fd28e..3747f85030043 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl @@ -10,29 +10,39 @@ extern int __nvvm_reflect_ocl(constant char *); -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_x() { - if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { - return (uint)__spirv_WorkgroupId_x() * (uint)__spirv_WorkgroupSize_x() + - (uint)__spirv_LocalInvocationId_x() + (uint)__spirv_GlobalOffset_x(); +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalInvocationId(int dim) { + switch (dim) { + case 0: { + if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { + return (uint)__spirv_BuiltInWorkgroupId(0) * + (uint)__spirv_BuiltInWorkgroupSize(0) + + (uint)__spirv_BuiltInLocalInvocationId(0) + + (uint)__spirv_BuiltInGlobalOffset(0); + } + return __spirv_BuiltInWorkgroupId(0) * __spirv_BuiltInWorkgroupSize(0) + + __spirv_BuiltInLocalInvocationId(0) + __spirv_BuiltInGlobalOffset(0); } - return __spirv_WorkgroupId_x() * __spirv_WorkgroupSize_x() + - __spirv_LocalInvocationId_x() + __spirv_GlobalOffset_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_y() { - if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { - return (uint)__spirv_WorkgroupId_y() * (uint)__spirv_WorkgroupSize_y() + - (uint)__spirv_LocalInvocationId_y() + (uint)__spirv_GlobalOffset_y(); + case 1: { + if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { + return (uint)__spirv_BuiltInWorkgroupId(1) * + (uint)__spirv_BuiltInWorkgroupSize(1) + + (uint)__spirv_BuiltInLocalInvocationId(1) + + (uint)__spirv_BuiltInGlobalOffset(1); + } + return __spirv_BuiltInWorkgroupId(1) * __spirv_BuiltInWorkgroupSize(1) + + __spirv_BuiltInLocalInvocationId(1) + __spirv_BuiltInGlobalOffset(1); } - return __spirv_WorkgroupId_y() * __spirv_WorkgroupSize_y() + - __spirv_LocalInvocationId_y() + __spirv_GlobalOffset_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_z() { - if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { - return (uint)__spirv_WorkgroupId_z() * (uint)__spirv_WorkgroupSize_z() + - (uint)__spirv_LocalInvocationId_z() + (uint)__spirv_GlobalOffset_z(); + case 2: { + if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { + return (uint)__spirv_BuiltInWorkgroupId(2) * + (uint)__spirv_BuiltInWorkgroupSize(2) + + (uint)__spirv_BuiltInLocalInvocationId(2) + + (uint)__spirv_BuiltInGlobalOffset(2); + } + return __spirv_BuiltInWorkgroupId(2) * __spirv_BuiltInWorkgroupSize(2) + + __spirv_BuiltInLocalInvocationId(2) + __spirv_BuiltInGlobalOffset(2); + } + default: + return 0; } - return __spirv_WorkgroupId_z() * __spirv_WorkgroupSize_z() + - __spirv_LocalInvocationId_z() + __spirv_GlobalOffset_z(); } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_offset.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_offset.cl index 053ab45875068..d8b9c96443780 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_offset.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_offset.cl @@ -10,14 +10,15 @@ // Compiler support is required to provide global offset on NVPTX. -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalOffset_x() { - return __builtin_ptx_implicit_offset()[0]; -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalOffset_y() { - return __builtin_ptx_implicit_offset()[1]; -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalOffset_z() { - return __builtin_ptx_implicit_offset()[2]; +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalOffset(int dim) { + switch (dim) { + case 0: + return __builtin_ptx_implicit_offset()[0]; + case 1: + return __builtin_ptx_implicit_offset()[1]; + case 2: + return __builtin_ptx_implicit_offset()[2]; + default: + return 0; + } } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_size.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_size.cl index 80975d920c262..708f0592487c0 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_size.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_size.cl @@ -8,14 +8,15 @@ #include -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_x() { - return __spirv_NumWorkgroups_x() * __spirv_WorkgroupSize_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_y() { - return __spirv_NumWorkgroups_y() * __spirv_WorkgroupSize_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_z() { - return __spirv_NumWorkgroups_z() * __spirv_WorkgroupSize_z(); +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalSize(int dim) { + switch (dim) { + case 0: + return __spirv_BuiltInNumWorkgroups(0) * __spirv_BuiltInWorkgroupSize(0); + case 1: + return __spirv_BuiltInNumWorkgroups(1) * __spirv_BuiltInWorkgroupSize(1); + case 2: + return __spirv_BuiltInNumWorkgroups(2) * __spirv_BuiltInWorkgroupSize(2); + default: + return 1; + } } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_group_id.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_group_id.cl index f13fdc8d2c604..7ea58306e8b36 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_group_id.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_group_id.cl @@ -8,14 +8,15 @@ #include -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupId_x() { - return __nvvm_read_ptx_sreg_ctaid_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupId_y() { - return __nvvm_read_ptx_sreg_ctaid_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupId_z() { - return __nvvm_read_ptx_sreg_ctaid_z(); +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInWorkgroupId(int dim) { + switch (dim) { + case 0: + return __nvvm_read_ptx_sreg_ctaid_x(); + case 1: + return __nvvm_read_ptx_sreg_ctaid_y(); + case 2: + return __nvvm_read_ptx_sreg_ctaid_z(); + default: + return 0; + } } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_id.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_id.cl index 996702a3e48e8..2b184d4501b15 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_id.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_id.cl @@ -8,14 +8,15 @@ #include -_CLC_DEF _CLC_OVERLOAD size_t __spirv_LocalInvocationId_x() { - return __nvvm_read_ptx_sreg_tid_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_LocalInvocationId_y() { - return __nvvm_read_ptx_sreg_tid_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_LocalInvocationId_z() { - return __nvvm_read_ptx_sreg_tid_z(); +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInLocalInvocationId(int dim) { + switch (dim) { + case 0: + return __nvvm_read_ptx_sreg_tid_x(); + case 1: + return __nvvm_read_ptx_sreg_tid_y(); + case 2: + return __nvvm_read_ptx_sreg_tid_z(); + default: + return 0; + } } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_size.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_size.cl index 39011e422e00d..b0b108d897267 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_size.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_size.cl @@ -8,14 +8,15 @@ #include -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_x() { - return __nvvm_read_ptx_sreg_ntid_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_y() { - return __nvvm_read_ptx_sreg_ntid_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_z() { - return __nvvm_read_ptx_sreg_ntid_z(); +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInWorkgroupSize(int dim) { + switch (dim) { + case 0: + return __nvvm_read_ptx_sreg_ntid_x(); + case 1: + return __nvvm_read_ptx_sreg_ntid_y(); + case 2: + return __nvvm_read_ptx_sreg_ntid_z(); + default: + return 1; + } } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_max_sub_group_size.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_max_sub_group_size.cl index f7e02cdf8d287..c94c57e64293c 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_max_sub_group_size.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_max_sub_group_size.cl @@ -8,7 +8,7 @@ #include -_CLC_DEF _CLC_OVERLOAD uint __spirv_SubgroupMaxSize() { +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupMaxSize() { return 32; // FIXME: warpsize is defined by NVVM IR but doesn't compile if used here // return __nvvm_read_ptx_sreg_warpsize(); diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_groups.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_groups.cl index fa0acb6d87577..b5161378c7241 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_groups.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_groups.cl @@ -8,14 +8,15 @@ #include -_CLC_DEF _CLC_OVERLOAD size_t __spirv_NumWorkgroups_x() { - return __nvvm_read_ptx_sreg_nctaid_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_NumWorkgroups_y() { - return __nvvm_read_ptx_sreg_nctaid_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_NumWorkgroups_z() { - return __nvvm_read_ptx_sreg_nctaid_z(); +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInNumWorkgroups(int dim) { + switch (dim) { + case 0: + return __nvvm_read_ptx_sreg_nctaid_x(); + case 1: + return __nvvm_read_ptx_sreg_nctaid_y(); + case 2: + return __nvvm_read_ptx_sreg_nctaid_z(); + default: + return 0; + } } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_id.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_id.cl index 746b6adbe2b5a..658c90a8139af 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_id.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_id.cl @@ -8,15 +8,15 @@ #include -_CLC_DEF _CLC_OVERLOAD uint __spirv_SubgroupId() { +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupId() { // sreg.warpid is volatile and doesn't represent virtual warp index // see https://docs.nvidia.com/cuda/parallel-thread-execution/index.html - size_t id_x = __spirv_LocalInvocationId_x(); - size_t id_y = __spirv_LocalInvocationId_y(); - size_t id_z = __spirv_LocalInvocationId_z(); - size_t size_x = __spirv_WorkgroupSize_x(); - size_t size_y = __spirv_WorkgroupSize_y(); - size_t size_z = __spirv_WorkgroupSize_z(); - uint sg_size = __spirv_SubgroupMaxSize(); + size_t id_x = __spirv_BuiltInLocalInvocationId(0); + size_t id_y = __spirv_BuiltInLocalInvocationId(1); + size_t id_z = __spirv_BuiltInLocalInvocationId(2); + size_t size_x = __spirv_BuiltInWorkgroupSize(0); + size_t size_y = __spirv_BuiltInWorkgroupSize(1); + size_t size_z = __spirv_BuiltInWorkgroupSize(2); + uint sg_size = __spirv_BuiltInSubgroupMaxSize(); return (id_z * size_y * size_x + id_y * size_x + id_x) / sg_size; } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_local_id.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_local_id.cl index 92bc9880b4bc3..7d690e9fa1201 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_local_id.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_local_id.cl @@ -8,6 +8,6 @@ #include -_CLC_DEF _CLC_OVERLOAD uint __spirv_SubgroupLocalInvocationId() { +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupLocalInvocationId() { return __nvvm_read_ptx_sreg_laneid(); } diff --git a/libclc/libspirv/lib/r600/workitem/get_global_offset.cl b/libclc/libspirv/lib/r600/workitem/get_global_offset.cl index 8fae5ba02deda..12c6f7fdb48a8 100644 --- a/libclc/libspirv/lib/r600/workitem/get_global_offset.cl +++ b/libclc/libspirv/lib/r600/workitem/get_global_offset.cl @@ -8,23 +8,27 @@ #include -_CLC_DEF _CLC_OVERLOAD uint __spirv_GlobalOffset_x() { - __attribute__((address_space(7))) uint * ptr = - (__attribute__((address_space(7))) uint *) - __builtin_r600_implicitarg_ptr(); +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInGlobalOffset(int dim) { + switch (dim) { + case 0: { + __attribute__((address_space(7))) uint *ptr = + (__attribute__((address_space(7))) + uint *)__builtin_r600_implicitarg_ptr(); return ptr[1]; -} - -_CLC_DEF _CLC_OVERLOAD uint __spirv_GlobalOffset_y() { - __attribute__((address_space(7))) uint * ptr = - (__attribute__((address_space(7))) uint *) - __builtin_r600_implicitarg_ptr(); + } + case 1: { + __attribute__((address_space(7))) uint *ptr = + (__attribute__((address_space(7))) + uint *)__builtin_r600_implicitarg_ptr(); return ptr[2]; -} - -_CLC_DEF _CLC_OVERLOAD uint __spirv_GlobalOffset_z() { - __attribute__((address_space(7))) uint * ptr = - (__attribute__((address_space(7))) uint *) - __builtin_r600_implicitarg_ptr(); + } + case 2: { + __attribute__((address_space(7))) uint *ptr = + (__attribute__((address_space(7))) + uint *)__builtin_r600_implicitarg_ptr(); return ptr[3]; + } + default: + return 0; + } } diff --git a/libclc/libspirv/lib/r600/workitem/get_global_size.cl b/libclc/libspirv/lib/r600/workitem/get_global_size.cl index 0bb9d11379052..77fca97d38205 100644 --- a/libclc/libspirv/lib/r600/workitem/get_global_size.cl +++ b/libclc/libspirv/lib/r600/workitem/get_global_size.cl @@ -12,14 +12,15 @@ uint __clc_r600_get_global_size_x(void) __asm("llvm.r600.read.global.size.x"); uint __clc_r600_get_global_size_y(void) __asm("llvm.r600.read.global.size.y"); uint __clc_r600_get_global_size_z(void) __asm("llvm.r600.read.global.size.z"); -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_x() { +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalSize(int dim) { + switch (dim) { + case 0: return __clc_r600_get_global_size_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_y() { + case 1: return __clc_r600_get_global_size_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalSize_z() { + case 2: return __clc_r600_get_global_size_z(); + default: + return 1; + } } diff --git a/libclc/libspirv/lib/r600/workitem/get_group_id.cl b/libclc/libspirv/lib/r600/workitem/get_group_id.cl index d7666d5b72b7e..ab806da493bdb 100644 --- a/libclc/libspirv/lib/r600/workitem/get_group_id.cl +++ b/libclc/libspirv/lib/r600/workitem/get_group_id.cl @@ -8,14 +8,15 @@ #include -_CLC_DEF _CLC_OVERLOAD uint __spirv_WorkgroupId_x() { +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInWorkgroupId(int dim) { + switch (dim) { + case 0: return __builtin_r600_read_tgid_x(); -} - -_CLC_DEF _CLC_OVERLOAD uint __spirv_WorkgroupId_y() { + case 1: return __builtin_r600_read_tgid_y(); -} - -_CLC_DEF _CLC_OVERLOAD uint __spirv_WorkgroupId_z() { + case 2: return __builtin_r600_read_tgid_z(); + default: + return 0; + } } diff --git a/libclc/libspirv/lib/r600/workitem/get_local_id.cl b/libclc/libspirv/lib/r600/workitem/get_local_id.cl index 1a486beacfc09..7393579609f7c 100644 --- a/libclc/libspirv/lib/r600/workitem/get_local_id.cl +++ b/libclc/libspirv/lib/r600/workitem/get_local_id.cl @@ -8,14 +8,15 @@ #include -_CLC_DEF _CLC_OVERLOAD uint __spirv_LocalInvocationId_x() { +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInLocalInvocationId(int dim) { + switch (dim) { + case 0: return __builtin_r600_read_tidig_x(); -} - -_CLC_DEF _CLC_OVERLOAD uint __spirv_LocalInvocationId_y() { + case 1: return __builtin_r600_read_tidig_y(); -} - -_CLC_DEF _CLC_OVERLOAD uint __spirv_LocalInvocationId_z() { + case 2: return __builtin_r600_read_tidig_z(); + default: + return 0; + } } diff --git a/libclc/libspirv/lib/r600/workitem/get_local_size.cl b/libclc/libspirv/lib/r600/workitem/get_local_size.cl index e7bb1f7578632..c6443493c74ed 100644 --- a/libclc/libspirv/lib/r600/workitem/get_local_size.cl +++ b/libclc/libspirv/lib/r600/workitem/get_local_size.cl @@ -12,14 +12,15 @@ uint __clc_r600_get_local_size_x(void) __asm("llvm.r600.read.local.size.x"); uint __clc_r600_get_local_size_y(void) __asm("llvm.r600.read.local.size.y"); uint __clc_r600_get_local_size_z(void) __asm("llvm.r600.read.local.size.z"); -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_x() { +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInWorkgroupSize(int dim) { + switch (dim) { + case 0: return __clc_r600_get_local_size_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_y() { + case 1: return __clc_r600_get_local_size_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_z() { + case 2: return __clc_r600_get_local_size_z(); + default: + return 1; + } } diff --git a/libclc/libspirv/lib/r600/workitem/get_num_groups.cl b/libclc/libspirv/lib/r600/workitem/get_num_groups.cl index 493e0c9eff5d8..fe4a80af23b98 100644 --- a/libclc/libspirv/lib/r600/workitem/get_num_groups.cl +++ b/libclc/libspirv/lib/r600/workitem/get_num_groups.cl @@ -12,14 +12,15 @@ uint __clc_r600_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x"); uint __clc_r600_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y"); uint __clc_r600_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z"); -_CLC_DEF _CLC_OVERLOAD size_t __spirv_NumWorkgroups_x() { +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInNumWorkgroups(int dim) { + switch (dim) { + case 0: return __clc_r600_get_num_groups_x(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_NumWorkgroups_y() { + case 1: return __clc_r600_get_num_groups_y(); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_NumWorkgroups_z() { + case 2: return __clc_r600_get_num_groups_z(); + default: + return 0; + } } diff --git a/libdevice/crt_wrapper.cpp b/libdevice/crt_wrapper.cpp index 0d6eefe6387dc..136859aff1fa0 100644 --- a/libdevice/crt_wrapper.cpp +++ b/libdevice/crt_wrapper.cpp @@ -47,24 +47,13 @@ int memcmp(const void *s1, const void *s2, size_t n) { DEVICE_EXTERN_C_INLINE int rand() { size_t gid = -#if defined(__NVPTX__) || defined(__AMDGCN__) - (__spirv_GlobalInvocationId_x() * __spirv_GlobalSize_y() * - __spirv_GlobalSize_z()) + - (__spirv_GlobalInvocationId_y() * __spirv_GlobalSize_z()) + - __spirv_GlobalInvocationId_z(); -#else - (__spirv_BuiltInGlobalInvocationId.x * __spirv_BuiltInGlobalSize.y * - __spirv_BuiltInGlobalSize.z) + - (__spirv_BuiltInGlobalInvocationId.y * __spirv_BuiltInGlobalSize.z) + - __spirv_BuiltInGlobalInvocationId.z; -#endif - size_t global_size = -#if defined(__NVPTX__) || defined(__AMDGCN__) - __spirv_GlobalSize_x() * __spirv_GlobalSize_y() * __spirv_GlobalSize_z(); -#else - __spirv_BuiltInGlobalSize.x * __spirv_BuiltInGlobalSize.y * - __spirv_BuiltInGlobalSize.z; -#endif + (__spirv_BuiltInGlobalInvocationId(0) * __spirv_BuiltInGlobalSize(1) * + __spirv_BuiltInGlobalSize(2)) + + (__spirv_BuiltInGlobalInvocationId(1) * __spirv_BuiltInGlobalSize(2)) + + __spirv_BuiltInGlobalInvocationId(2); + size_t global_size = __spirv_BuiltInGlobalSize(0) * + __spirv_BuiltInGlobalSize(1) * + __spirv_BuiltInGlobalSize(2); size_t gid1 = (global_size > RAND_NEXT_LEN) ? (gid & (RAND_NEXT_LEN - 1)) : gid; if (RAND_NEXT_ACC[gid1] == 0) @@ -80,24 +69,13 @@ int rand() { DEVICE_EXTERN_C_INLINE void srand(unsigned int seed) { size_t gid = -#if defined(__NVPTX__) || defined(__AMDGCN__) - (__spirv_GlobalInvocationId_x() * __spirv_GlobalSize_y() * - __spirv_GlobalSize_z()) + - (__spirv_GlobalInvocationId_y() * __spirv_GlobalSize_z()) + - __spirv_GlobalInvocationId_z(); -#else - (__spirv_BuiltInGlobalInvocationId.x * __spirv_BuiltInGlobalSize.y * - __spirv_BuiltInGlobalSize.z) + - (__spirv_BuiltInGlobalInvocationId.y * __spirv_BuiltInGlobalSize.z) + - __spirv_BuiltInGlobalInvocationId.z; -#endif - size_t global_size = -#if defined(__NVPTX__) || defined(__AMDGCN__) - __spirv_GlobalSize_x() * __spirv_GlobalSize_y() * __spirv_GlobalSize_z(); -#else - __spirv_BuiltInGlobalSize.x * __spirv_BuiltInGlobalSize.y * - __spirv_BuiltInGlobalSize.z; -#endif + (__spirv_BuiltInGlobalInvocationId(0) * __spirv_BuiltInGlobalSize(1) * + __spirv_BuiltInGlobalSize(2)) + + (__spirv_BuiltInGlobalInvocationId(1) * __spirv_BuiltInGlobalSize(2)) + + __spirv_BuiltInGlobalInvocationId(2); + size_t global_size = __spirv_BuiltInGlobalSize(0) * + __spirv_BuiltInGlobalSize(1) * + __spirv_BuiltInGlobalSize(2); size_t gid1 = (global_size > RAND_NEXT_LEN) ? (gid & (RAND_NEXT_LEN - 1)) : gid; RAND_NEXT_ACC[gid1] = seed; @@ -127,20 +105,20 @@ void _wassert(const wchar_t *wexpr, const wchar_t *wfile, unsigned line) { __truncate_wchar_char_str(wexpr, expr, sizeof(expr)); __devicelib_assert_fail( - expr, file, line, /*func=*/nullptr, __spirv_GlobalInvocationId_x(), - __spirv_GlobalInvocationId_y(), __spirv_GlobalInvocationId_z(), - __spirv_LocalInvocationId_x(), __spirv_LocalInvocationId_y(), - __spirv_LocalInvocationId_z()); + expr, file, line, /*func=*/nullptr, __spirv_BuiltInGlobalInvocationId(0), + __spirv_BuiltInGlobalInvocationId(1), + __spirv_BuiltInGlobalInvocationId(2), __spirv_BuiltInLocalInvocationId(0), + __spirv_BuiltInLocalInvocationId(1), __spirv_BuiltInLocalInvocationId(2)); } #else DEVICE_EXTERN_C void __assert_fail(const char *expr, const char *file, unsigned int line, const char *func) { __devicelib_assert_fail( - expr, file, line, func, __spirv_GlobalInvocationId_x(), - __spirv_GlobalInvocationId_y(), __spirv_GlobalInvocationId_z(), - __spirv_LocalInvocationId_x(), __spirv_LocalInvocationId_y(), - __spirv_LocalInvocationId_z()); + expr, file, line, func, __spirv_BuiltInGlobalInvocationId(0), + __spirv_BuiltInGlobalInvocationId(1), + __spirv_BuiltInGlobalInvocationId(2), __spirv_BuiltInLocalInvocationId(0), + __spirv_BuiltInLocalInvocationId(1), __spirv_BuiltInLocalInvocationId(2)); } // In GCC-15, std::__glibcxx_assert_fail is added to do runtime check for some @@ -151,10 +129,10 @@ DEVICE_EXTERN_CPP void __glibcxx_assert_fail(const char *file, int line, const char *func, const char *cond) noexcept { __devicelib_assert_fail( - cond, file, line, func, __spirv_GlobalInvocationId_x(), - __spirv_GlobalInvocationId_y(), __spirv_GlobalInvocationId_z(), - __spirv_LocalInvocationId_x(), __spirv_LocalInvocationId_y(), - __spirv_LocalInvocationId_z()); + cond, file, line, func, __spirv_BuiltInGlobalInvocationId(0), + __spirv_BuiltInGlobalInvocationId(1), + __spirv_BuiltInGlobalInvocationId(2), __spirv_BuiltInLocalInvocationId(0), + __spirv_BuiltInLocalInvocationId(1), __spirv_BuiltInLocalInvocationId(2)); } } // namespace std diff --git a/libdevice/include/sanitizer_utils.hpp b/libdevice/include/sanitizer_utils.hpp index 1b49fc58a6bc2..206d49763a4c0 100644 --- a/libdevice/include/sanitizer_utils.hpp +++ b/libdevice/include/sanitizer_utils.hpp @@ -13,22 +13,22 @@ #if defined(__SPIR__) || defined(__SPIRV__) inline size_t WorkGroupLinearId() { - return __spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y * - __spirv_BuiltInNumWorkgroups.z + - __spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z + - __spirv_BuiltInWorkgroupId.z; + return __spirv_BuiltInWorkgroupId(0) * __spirv_BuiltInNumWorkgroups(1) * + __spirv_BuiltInNumWorkgroups(2) + + __spirv_BuiltInWorkgroupId(1) * __spirv_BuiltInNumWorkgroups(2) + + __spirv_BuiltInWorkgroupId(2); } static inline size_t LocalLinearId() { - return __spirv_BuiltInLocalInvocationId.x * __spirv_BuiltInWorkgroupSize.y * - __spirv_BuiltInWorkgroupSize.z + - __spirv_BuiltInLocalInvocationId.y * __spirv_BuiltInWorkgroupSize.z + - __spirv_BuiltInLocalInvocationId.z; + return __spirv_BuiltInLocalInvocationId(0) * __spirv_BuiltInWorkgroupSize(1) * + __spirv_BuiltInWorkgroupSize(2) + + __spirv_BuiltInLocalInvocationId(1) * __spirv_BuiltInWorkgroupSize(2) + + __spirv_BuiltInLocalInvocationId(2); } // For GPU device, each sub group is a hardware thread inline size_t SubGroupLinearId() { - return __spirv_BuiltInGlobalLinearId / __spirv_BuiltInSubgroupSize; + return __spirv_BuiltInGlobalLinearId() / __spirv_BuiltInSubgroupSize(); } inline void SubGroupBarrier() { diff --git a/libdevice/itt_compiler_wrappers.cpp b/libdevice/itt_compiler_wrappers.cpp index c9ac0700fcbd7..6109359d31980 100644 --- a/libdevice/itt_compiler_wrappers.cpp +++ b/libdevice/itt_compiler_wrappers.cpp @@ -14,13 +14,13 @@ SYCL_EXTERNAL EXTERN_C void __itt_offload_wi_start_wrapper() { if (!isITTEnabled()) return; - size_t GroupID[3] = {__spirv_BuiltInWorkgroupId.x, - __spirv_BuiltInWorkgroupId.y, - __spirv_BuiltInWorkgroupId.z}; - size_t WIID = __spirv_BuiltInGlobalLinearId; - uint32_t WGSize = static_cast(__spirv_BuiltInWorkgroupSize.x * - __spirv_BuiltInWorkgroupSize.y * - __spirv_BuiltInWorkgroupSize.z); + size_t GroupID[3] = {__spirv_BuiltInWorkgroupId(0), + __spirv_BuiltInWorkgroupId(1), + __spirv_BuiltInWorkgroupId(2)}; + size_t WIID = __spirv_BuiltInGlobalLinearId(); + uint32_t WGSize = static_cast(__spirv_BuiltInWorkgroupSize(0) * + __spirv_BuiltInWorkgroupSize(1) * + __spirv_BuiltInWorkgroupSize(2)); __itt_offload_wi_start_stub(GroupID, WIID, WGSize); } @@ -28,10 +28,10 @@ SYCL_EXTERNAL EXTERN_C void __itt_offload_wi_finish_wrapper() { if (!isITTEnabled()) return; - size_t GroupID[3] = {__spirv_BuiltInWorkgroupId.x, - __spirv_BuiltInWorkgroupId.y, - __spirv_BuiltInWorkgroupId.z}; - size_t WIID = __spirv_BuiltInGlobalLinearId; + size_t GroupID[3] = {__spirv_BuiltInWorkgroupId(0), + __spirv_BuiltInWorkgroupId(1), + __spirv_BuiltInWorkgroupId(2)}; + size_t WIID = __spirv_BuiltInGlobalLinearId(); __itt_offload_wi_finish_stub(GroupID, WIID); } @@ -46,10 +46,10 @@ SYCL_EXTERNAL EXTERN_C void __itt_offload_wi_resume_wrapper() { if (!isITTEnabled()) return; - size_t GroupID[3] = {__spirv_BuiltInWorkgroupId.x, - __spirv_BuiltInWorkgroupId.y, - __spirv_BuiltInWorkgroupId.z}; - size_t WIID = __spirv_BuiltInGlobalLinearId; + size_t GroupID[3] = {__spirv_BuiltInWorkgroupId(0), + __spirv_BuiltInWorkgroupId(1), + __spirv_BuiltInWorkgroupId(2)}; + size_t WIID = __spirv_BuiltInGlobalLinearId(); __itt_offload_wi_resume_stub(GroupID, WIID); } diff --git a/libdevice/nativecpu_utils.cpp b/libdevice/nativecpu_utils.cpp index 819fd0910858b..00d4178d2921c 100644 --- a/libdevice/nativecpu_utils.cpp +++ b/libdevice/nativecpu_utils.cpp @@ -76,11 +76,11 @@ DefGenericCastToPtrExpl(ToGlobal, OCL_GLOBAL); template <> \ __SYCL_CONVERGENT__ DEVICE_EXTERNAL Type \ __spirv_SubgroupBlockReadINTEL(const OCL_GLOBAL PType *Ptr) noexcept { \ - return Ptr[__spirv_SubgroupLocalInvocationId()]; \ + return Ptr[__spirv_BuiltInSubgroupLocalInvocationId()]; \ } \ __SYCL_CONVERGENT__ DEVICE_EXTERNAL void __spirv_SubgroupBlockWriteINTEL( \ PType OCL_GLOBAL *ptr, Type v) noexcept { \ - ((Type *)ptr)[__spirv_SubgroupLocalInvocationId()] = v; \ + ((Type *)ptr)[__spirv_BuiltInSubgroupLocalInvocationId()] = v; \ } \ static_assert(true) @@ -329,9 +329,9 @@ DefineShuffleVec2to16(float, f32, float); DEVICE_EXTERNAL GET_PROPS uint32_t bname() { return muxname(); } \ static_assert(true) // subgroup -GEN_u32(__spirv_SubgroupLocalInvocationId, __mux_get_sub_group_local_id); -GEN_u32(__spirv_SubgroupMaxSize, __mux_get_max_sub_group_size); -GEN_u32(__spirv_SubgroupId, __mux_get_sub_group_id); +GEN_u32(__spirv_BuiltInSubgroupLocalInvocationId, __mux_get_sub_group_local_id); +GEN_u32(__spirv_BuiltInSubgroupMaxSize, __mux_get_max_sub_group_size); +GEN_u32(__spirv_BuiltInSubgroupId, __mux_get_sub_group_id); // I64_I32 #define GEN_p(bname, muxname, arg) \ @@ -344,11 +344,11 @@ GEN_u32(__spirv_SubgroupId, __mux_get_sub_group_id); GEN_p(bname##_y, ncpu_name, 1); \ GEN_p(bname##_z, ncpu_name, 2) -GEN_xyz(__spirv_GlobalOffset, __mux_get_global_offset); -GEN_xyz(__spirv_LocalInvocationId, __mux_get_local_id); -GEN_xyz(__spirv_NumWorkgroups, __mux_get_num_groups); -GEN_xyz(__spirv_WorkgroupSize, __mux_get_local_size); -GEN_xyz(__spirv_WorkgroupId, __mux_get_group_id); +GEN_xyz(__spirv_BuiltInGlobalOffset, __mux_get_global_offset); +GEN_xyz(__spirv_BuiltInLocalInvocationId, __mux_get_local_id); +GEN_xyz(__spirv_BuiltInNumWorkgroups, __mux_get_num_groups); +GEN_xyz(__spirv_BuiltInWorkgroupSize, __mux_get_local_size); +GEN_xyz(__spirv_BuiltInWorkgroupId, __mux_get_group_id); template using MakeGlobalType = typename sycl::detail::DecoratedType< diff --git a/libdevice/sanitizer/asan_rtl.cpp b/libdevice/sanitizer/asan_rtl.cpp index db8368d3c10ef..8147a3cbe9628 100644 --- a/libdevice/sanitizer/asan_rtl.cpp +++ b/libdevice/sanitizer/asan_rtl.cpp @@ -405,12 +405,12 @@ void __asan_internal_report_save( SanitizerReport.Func[MaxFuncIdx] = '\0'; SanitizerReport.Line = line; - SanitizerReport.GID0 = __spirv_GlobalInvocationId_x(); - SanitizerReport.GID1 = __spirv_GlobalInvocationId_y(); - SanitizerReport.GID2 = __spirv_GlobalInvocationId_z(); - SanitizerReport.LID0 = __spirv_LocalInvocationId_x(); - SanitizerReport.LID1 = __spirv_LocalInvocationId_y(); - SanitizerReport.LID2 = __spirv_LocalInvocationId_z(); + SanitizerReport.GID0 = __spirv_BuiltInGlobalInvocationId(0); + SanitizerReport.GID1 = __spirv_BuiltInGlobalInvocationId(1); + SanitizerReport.GID2 = __spirv_BuiltInGlobalInvocationId(2); + SanitizerReport.LID0 = __spirv_BuiltInLocalInvocationId(0); + SanitizerReport.LID1 = __spirv_BuiltInLocalInvocationId(1); + SanitizerReport.LID2 = __spirv_BuiltInLocalInvocationId(2); SanitizerReport.Address = ptr; SanitizerReport.IsWrite = is_write; diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index 487515a843d56..7663f46129451 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -119,12 +119,12 @@ void SaveReport(const uint32_t size, const char __SYCL_CONSTANT__ *file, SanitizerReport.AccessSize = size; SanitizerReport.Origin = origin; SanitizerReport.Line = line; - SanitizerReport.GID0 = __spirv_GlobalInvocationId_x(); - SanitizerReport.GID1 = __spirv_GlobalInvocationId_y(); - SanitizerReport.GID2 = __spirv_GlobalInvocationId_z(); - SanitizerReport.LID0 = __spirv_LocalInvocationId_x(); - SanitizerReport.LID1 = __spirv_LocalInvocationId_y(); - SanitizerReport.LID2 = __spirv_LocalInvocationId_z(); + SanitizerReport.GID0 = __spirv_BuiltInGlobalInvocationId(0); + SanitizerReport.GID1 = __spirv_BuiltInGlobalInvocationId(1); + SanitizerReport.GID2 = __spirv_BuiltInGlobalInvocationId(2); + SanitizerReport.LID0 = __spirv_BuiltInLocalInvocationId(0); + SanitizerReport.LID1 = __spirv_BuiltInLocalInvocationId(1); + SanitizerReport.LID2 = __spirv_BuiltInLocalInvocationId(2); // Show we've done copying atomicStore(&SanitizerReport.Flag, MSAN_REPORT_FINISH); @@ -379,8 +379,9 @@ inline void UnpoisonShadow(uptr addr, uint32_t as, size_t size) { // Check if the current work item is the first one in the work group inline bool IsFirstWorkItemWthinWorkGroup() { - return __spirv_LocalInvocationId_x() + __spirv_LocalInvocationId_y() + - __spirv_LocalInvocationId_z() == + return __spirv_BuiltInLocalInvocationId(0) + + __spirv_BuiltInLocalInvocationId(1) + + __spirv_BuiltInLocalInvocationId(2) == 0; } diff --git a/libdevice/sanitizer/tsan_rtl.cpp b/libdevice/sanitizer/tsan_rtl.cpp index b6a181597634b..60d71b724fc2f 100644 --- a/libdevice/sanitizer/tsan_rtl.cpp +++ b/libdevice/sanitizer/tsan_rtl.cpp @@ -149,8 +149,8 @@ inline __SYCL_GLOBAL__ RawShadow *MemToShadow(uptr addr, uint32_t as) { inline int GetCurrentSid() { const size_t lid = LocalLinearId(); const size_t ThreadPerWorkGroup = - Min(4, __spirv_BuiltInWorkgroupSize.x * __spirv_BuiltInWorkgroupSize.y * - __spirv_BuiltInWorkgroupSize.z); + Min(4, __spirv_BuiltInWorkgroupSize(0) * __spirv_BuiltInWorkgroupSize(1) * + __spirv_BuiltInWorkgroupSize(2)); if (lid >= ThreadPerWorkGroup) return -1; @@ -235,12 +235,12 @@ inline void DoReportRace(__SYCL_GLOBAL__ RawShadow *s, AccessType type, SanitizerReport.Func[MaxFuncIdx] = '\0'; SanitizerReport.Line = line; - SanitizerReport.GID0 = __spirv_GlobalInvocationId_x(); - SanitizerReport.GID1 = __spirv_GlobalInvocationId_y(); - SanitizerReport.GID2 = __spirv_GlobalInvocationId_z(); - SanitizerReport.LID0 = __spirv_LocalInvocationId_x(); - SanitizerReport.LID1 = __spirv_LocalInvocationId_y(); - SanitizerReport.LID2 = __spirv_LocalInvocationId_z(); + SanitizerReport.GID0 = __spirv_BuiltInGlobalInvocationId(0); + SanitizerReport.GID1 = __spirv_BuiltInGlobalInvocationId(1); + SanitizerReport.GID2 = __spirv_BuiltInGlobalInvocationId(2); + SanitizerReport.LID0 = __spirv_BuiltInLocalInvocationId(0); + SanitizerReport.LID1 = __spirv_BuiltInLocalInvocationId(1); + SanitizerReport.LID2 = __spirv_BuiltInLocalInvocationId(2); atomicStore(&TsanLaunchInfo->Lock, 0); break; @@ -480,8 +480,9 @@ static __SYCL_CONSTANT__ const char __tsan_print_cleanup_local[] = DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_static_local(uptr addr, size_t size) { // Update shadow memory of local memory only on first work-item - if (__spirv_LocalInvocationId_x() + __spirv_LocalInvocationId_y() + - __spirv_LocalInvocationId_z() == + if (__spirv_BuiltInLocalInvocationId(0) + + __spirv_BuiltInLocalInvocationId(1) + + __spirv_BuiltInLocalInvocationId(2) == 0) { if (TsanLaunchInfo->LocalShadowOffset == 0) return; diff --git a/libdevice/spirv_vars.h b/libdevice/spirv_vars.h index fbeb82e4901e5..fad12e685c278 100644 --- a/libdevice/spirv_vars.h +++ b/libdevice/spirv_vars.h @@ -11,69 +11,25 @@ #include "device.h" #include +#include // for uint32_t -#if defined(__NVPTX__) || defined(__AMDGCN__) -// For AMD/Cuda those symbols will be provided by libclc. -DEVICE_EXTERNAL size_t __spirv_GlobalInvocationId_x(); -DEVICE_EXTERNAL size_t __spirv_GlobalInvocationId_y(); -DEVICE_EXTERNAL size_t __spirv_GlobalInvocationId_z(); -DEVICE_EXTERNAL size_t __spirv_LocalInvocationId_x(); -DEVICE_EXTERNAL size_t __spirv_LocalInvocationId_y(); -DEVICE_EXTERNAL size_t __spirv_LocalInvocationId_z(); -DEVICE_EXTERNAL size_t __spirv_GlobalSize_x(); -DEVICE_EXTERNAL size_t __spirv_GlobalSize_y(); -DEVICE_EXTERNAL size_t __spirv_GlobalSize_z(); -#endif // __NVPTX__ || __AMDGCN__ +#ifdef __SYCL_DEVICE_ONLY__ -#if defined(__SPIR__) || defined(__SPIRV__) +DEVICE_EXTERNAL size_t __spirv_BuiltInGlobalInvocationId(int); +DEVICE_EXTERNAL size_t __spirv_BuiltInGlobalSize(int); +DEVICE_EXTERNAL size_t __spirv_BuiltInGlobalOffset(int); +DEVICE_EXTERNAL size_t __spirv_BuiltInNumWorkgroups(int); +DEVICE_EXTERNAL size_t __spirv_BuiltInWorkgroupSize(int); +DEVICE_EXTERNAL size_t __spirv_BuiltInWorkgroupId(int); +DEVICE_EXTERNAL size_t __spirv_BuiltInLocalInvocationId(int); +DEVICE_EXTERNAL size_t __spirv_BuiltInGlobalLinearId(); -#include +DEVICE_EXTERNAL uint32_t __spirv_BuiltInSubgroupSize(); +DEVICE_EXTERNAL uint32_t __spirv_BuiltInSubgroupMaxSize(); +DEVICE_EXTERNAL uint32_t __spirv_BuiltInNumSubgroups(); +DEVICE_EXTERNAL uint32_t __spirv_BuiltInSubgroupId(); +DEVICE_EXTERNAL uint32_t __spirv_BuiltInSubgroupLocalInvocationId(); -#define __SPIRV_VAR_QUALIFIERS EXTERN_C const -typedef size_t size_t_vec __attribute__((ext_vector_type(3))); -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalInvocationId; -__SPIRV_VAR_QUALIFIERS size_t __spirv_BuiltInGlobalLinearId; -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalSize; -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInLocalInvocationId; -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInNumWorkgroups; -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupId; -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupSize; +#endif // __SYCL_DEVICE_ONLY__ -__SPIRV_VAR_QUALIFIERS uint32_t __spirv_BuiltInSubgroupSize; - -__SPIRV_VAR_QUALIFIERS uint32_t __spirv_BuiltInSubgroupLocalInvocationId; - -// FIXME: change DEVICE_EXTERNAL to static and rename the functions, -// when #3311 is fixed. -// These are just internal functions used within libdevice. -// We must not intrude the __spirv "namespace", so we'd better -// use names like getGlobalInvocationIdX. -// Libdevice must not export these APIs either, but it currently -// exports them due to DEVICE_EXTERNAL. -DEVICE_EXTERNAL inline size_t __spirv_GlobalInvocationId_x() { - return __spirv_BuiltInGlobalInvocationId.x; -} -DEVICE_EXTERNAL inline size_t __spirv_GlobalInvocationId_y() { - return __spirv_BuiltInGlobalInvocationId.y; -} -DEVICE_EXTERNAL inline size_t __spirv_GlobalInvocationId_z() { - return __spirv_BuiltInGlobalInvocationId.z; -} - -DEVICE_EXTERNAL inline size_t __spirv_LocalInvocationId_x() { - return __spirv_BuiltInLocalInvocationId.x; -} -DEVICE_EXTERNAL inline size_t __spirv_LocalInvocationId_y() { - return __spirv_BuiltInLocalInvocationId.y; -} -DEVICE_EXTERNAL inline size_t __spirv_LocalInvocationId_z() { - return __spirv_BuiltInLocalInvocationId.z; -} - -#if !defined(__SPIR__) && !defined(__SPIRV__) -const size_t_vec __spirv_BuiltInGlobalInvocationId{}; -const size_t_vec __spirv_BuiltInLocalInvocationId{}; -#endif // !__SPIR__ && !__SPIRV__ - -#endif // __SPIR__ || __SPIRV__ #endif // __LIBDEVICE_SPIRV_VARS_H diff --git a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp index 8acc09db641c0..b5ba1f004d638 100644 --- a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp @@ -131,7 +131,12 @@ enum class lsc_subopcode : uint8_t { fence = 0x1f, }; static constexpr char ESIMD_INSERTED_VSTORE_FUNC_NAME[] = "_Z14__esimd_vstorev"; -static constexpr char SPIRV_INTRIN_PREF[] = "__spirv_BuiltIn"; +static constexpr char SPIRV_BuiltInSubgroupLocalInvocationId[] = + "_Z40__spirv_BuiltInSubgroupLocalInvocationIdv"; +static constexpr char SPIRV_BuiltInSubgroupSizev[] = + "_Z27__spirv_BuiltInSubgroupSizev"; +static constexpr char SPIRV_BuiltInSubgroupMaxSizev[] = + "_Z30__spirv_BuiltInSubgroupMaxSizev"; struct ESIMDIntrinDesc { // Denotes argument translation rule kind. enum GenXArgRuleKind { @@ -1280,37 +1285,23 @@ bool translateLLVMIntrinsic(CallInst *CI) { return true; // "intrinsic has been translated, erase the original call" } -/// Replaces the load \p LI of SPIRV global with a compile time known constant +/// Replaces SPIRV workitem built-in call with a compile time known constant /// when possible. The replaced instructions are stored into the given /// container \p InstsToErase. static void -translateSpirvGlobalUses(LoadInst *LI, StringRef SpirvGlobalName, - SmallVectorImpl &InstsToErase) { +translateWorkItemBuiltInUse(CallInst *CI, StringRef SpirvBuiltInName, + SmallVectorImpl &InstsToErase) { Value *NewInst = nullptr; - if (SpirvGlobalName == "SubgroupLocalInvocationId") { - NewInst = llvm::Constant::getNullValue(LI->getType()); - } else if (SpirvGlobalName == "SubgroupSize" || - SpirvGlobalName == "SubgroupMaxSize") { - NewInst = llvm::Constant::getIntegerValue(LI->getType(), + if (SpirvBuiltInName == SPIRV_BuiltInSubgroupLocalInvocationId) { + NewInst = llvm::Constant::getNullValue(CI->getType()); + } else if (SpirvBuiltInName == SPIRV_BuiltInSubgroupSizev || + SpirvBuiltInName == SPIRV_BuiltInSubgroupMaxSizev) { + NewInst = llvm::Constant::getIntegerValue(CI->getType(), llvm::APInt(32, 1, true)); } if (NewInst) { - LI->replaceAllUsesWith(NewInst); - InstsToErase.push_back(LI); - } -} - -static void translateGlobalUse(Value *Use, StringRef SpirvGlobalName, - SmallVectorImpl &InstsToErase) { - LoadInst *LI = dyn_cast(Use); - ConstantExpr *CE = dyn_cast(Use); - GetElementPtrConstantExpr *GEPCE = dyn_cast(Use); - if (LI != nullptr) { - translateSpirvGlobalUses(LI, SpirvGlobalName, InstsToErase); - } else if (CE != nullptr || GEPCE != nullptr) { - for (User *U : (CE == nullptr ? GEPCE : CE)->users()) { - translateGlobalUse(U, SpirvGlobalName, InstsToErase); - } + CI->replaceAllUsesWith(NewInst); + InstsToErase.push_back(CI); } } @@ -2117,13 +2108,14 @@ PreservedAnalyses SYCLLowerESIMDPass::run(Module &M, } SmallVector ToErase; - constexpr size_t PrefLen = StringRef(SPIRV_INTRIN_PREF).size(); - for (GlobalVariable &Global : M.globals()) { - if (!Global.getName().starts_with(SPIRV_INTRIN_PREF)) + for (Function &F : M) { + if (F.getName() != SPIRV_BuiltInSubgroupLocalInvocationId && + F.getName() != SPIRV_BuiltInSubgroupSizev && + F.getName() != SPIRV_BuiltInSubgroupMaxSizev) continue; - for (User *U : Global.users()) - translateGlobalUse(U, Global.getName().drop_front(PrefLen), ToErase); + for (User *U : F.users()) + translateWorkItemBuiltInUse(cast(U), F.getName(), ToErase); } for (auto *CI : ToErase) CI->eraseFromParent(); diff --git a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp index 08dcfe4b84ae0..b6be652a7b20a 100644 --- a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp +++ b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp @@ -940,23 +940,22 @@ Value *spirv::genPseudoLocalID(Instruction &Before, const Triple &TT) { IRBuilder<> Bld(Ctx); Bld.SetInsertPoint(&Before); - auto CreateCallee = [&](StringRef Name) { - FunctionCallee Callee = M.getOrInsertFunction(Name, RetTy); + auto CreateCallee = [&](StringRef Name, int Dim) { + auto *ArgTy = Type::getInt32Ty(Ctx); + FunctionCallee Callee = M.getOrInsertFunction(Name, RetTy, ArgTy); assert(Callee.getCallee() && "spirv intrinsic creation failed"); - return Bld.CreateCall(Callee, {}); + return Bld.CreateCall(Callee, {ConstantInt::get(ArgTy, Dim)}); }; - Value *LocalInvocationIdX = - CreateCallee("_Z27__spirv_LocalInvocationId_xv"); - Value *LocalInvocationIdY = - CreateCallee("_Z27__spirv_LocalInvocationId_yv"); - Value *LocalInvocationIdZ = - CreateCallee("_Z27__spirv_LocalInvocationId_zv"); + StringRef LocalInvocationIdName = "_Z33__spirv_BuiltInLocalInvocationIdi"; + Value *LocalInvocationIdX = CreateCallee(LocalInvocationIdName, 0); + Value *LocalInvocationIdY = CreateCallee(LocalInvocationIdName, 1); + Value *LocalInvocationIdZ = CreateCallee(LocalInvocationIdName, 2); // 1: returns - // __spirv_LocalInvocationId_x() | - // __spirv_LocalInvocationId_y() | - // __spirv_LocalInvocationId_z() + // __spirv_BuiltInLocalInvocationId() | + // __spirv_BuiltInLocalInvocationId() | + // __spirv_BuiltInLocalInvocationId() // return Bld.CreateOr(LocalInvocationIdX, Bld.CreateOr(LocalInvocationIdY, LocalInvocationIdZ)); diff --git a/llvm/test/SYCLLowerIR/convergent.ll b/llvm/test/SYCLLowerIR/convergent.ll index 56f6efce5ca80..81a4cf248cf87 100644 --- a/llvm/test/SYCLLowerIR/convergent.ll +++ b/llvm/test/SYCLLowerIR/convergent.ll @@ -9,18 +9,14 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" %struct.baz = type { i64 } define internal spir_func void @wibble(ptr byval(%struct.baz) %arg1) !work_group_scope !0 { -; CHECK-PTX: call i64 @_Z27__spirv_LocalInvocationId_xv() -; CHECK-PTX: call i64 @_Z27__spirv_LocalInvocationId_yv() -; CHECK-PTX: call i64 @_Z27__spirv_LocalInvocationId_zv() +; CHECK-PTX: call i64 @_Z33__spirv_BuiltInLocalInvocationIdi(i32 0) +; CHECK-PTX: call i64 @_Z33__spirv_BuiltInLocalInvocationIdi(i32 1) +; CHECK-PTX: call i64 @_Z33__spirv_BuiltInLocalInvocationIdi(i32 2) ; CHECK: call void @_Z22__spirv_ControlBarrieriii(i32 2, i32 2, i32 272) ret void } -; CHECK-PTX: declare i64 @_Z27__spirv_LocalInvocationId_xv() - -; CHECK-PTX: declare i64 @_Z27__spirv_LocalInvocationId_yv() - -; CHECK-PTX: declare i64 @_Z27__spirv_LocalInvocationId_zv() +; CHECK-PTX: declare i64 @_Z33__spirv_BuiltInLocalInvocationIdi(i32) ; CHECK: ; Function Attrs: convergent ; CHECK: declare void @_Z22__spirv_ControlBarrieriii(i32, i32, i32) #[[ATTR_NUM:[0-9]+]] diff --git a/llvm/test/tools/sycl-post-link/sycl-post-link-test.ll b/llvm/test/tools/sycl-post-link/sycl-post-link-test.ll index 269276c7c0ff4..70519b1e3f8b4 100644 --- a/llvm/test/tools/sycl-post-link/sycl-post-link-test.ll +++ b/llvm/test/tools/sycl-post-link/sycl-post-link-test.ll @@ -6,14 +6,10 @@ target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" -@__spirv_BuiltInSubgroupLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant i32, align 4 -@__spirv_BuiltInSubgroupSize = external dso_local local_unnamed_addr addrspace(1) constant i32, align 4 -@__spirv_BuiltInSubgroupMaxSize = external dso_local local_unnamed_addr addrspace(1) constant i32, align 4 - ; Function Attrs: convergent norecurse define dso_local spir_kernel void @kernel_SubgroupLocalInvocationId(ptr addrspace(1) noundef align 8 %_arg_DoNotOptimize, ptr addrspace(1) noundef align 4 %_arg_DoNotOptimize32) #0 !sycl_explicit_simd !3 { entry: - %0 = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 + %0 = call spir_func i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() %conv.i = zext i32 %0 to i64 store i64 %conv.i, ptr addrspace(1) %_arg_DoNotOptimize, align 8 %add.i = add i32 %0, 3 @@ -28,7 +24,7 @@ entry: ; Function Attrs: convergent norecurse define dso_local spir_kernel void @kernel_SubgroupSize(ptr addrspace(1) noundef align 8 %_arg_DoNotOptimize, ptr addrspace(1) noundef align 4 %_arg_DoNotOptimize32)#0 !sycl_explicit_simd !3{ entry: - %0 = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4 + %0 = call spir_func i32 @_Z27__spirv_BuiltInSubgroupSizev() %conv.i = zext i32 %0 to i64 store i64 %conv.i, ptr addrspace(1) %_arg_DoNotOptimize, align 8 %add.i = add i32 %0, 7 @@ -43,7 +39,7 @@ entry: ; Function Attrs: convergent norecurse define dso_local spir_kernel void @kernel_SubgroupMaxSize(ptr addrspace(1) noundef align 8 %_arg_DoNotOptimize, ptr addrspace(1) noundef align 4 %_arg_DoNotOptimize32) #0 !sycl_explicit_simd !3 { entry: - %0 = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupMaxSize, align 4 + %0 = call spir_func i32 @_Z30__spirv_BuiltInSubgroupMaxSizev() %conv.i = zext i32 %0 to i64 store i64 %conv.i, ptr addrspace(1) %_arg_DoNotOptimize, align 8 %add.i = add i32 %0, 9 @@ -55,6 +51,10 @@ entry: ; CHECK: %add.i = add i32 1, 9 ; CHECK: store i32 %add.i, ptr addrspace(1) %_arg_DoNotOptimize32, align 4 +declare spir_func i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() +declare spir_func i32 @_Z27__spirv_BuiltInSubgroupSizev() +declare spir_func i32 @_Z30__spirv_BuiltInSubgroupMaxSizev() + attributes #0 = { "sycl-module-id"="a.cpp" } !llvm.module.flags = !{!0} diff --git a/sycl/include/sycl/__spirv/spirv_vars.hpp b/sycl/include/sycl/__spirv/spirv_vars.hpp index 285f5e37e37e0..aa2c5d27a275d 100644 --- a/sycl/include/sycl/__spirv/spirv_vars.hpp +++ b/sycl/include/sycl/__spirv/spirv_vars.hpp @@ -10,162 +10,24 @@ #ifdef __SYCL_DEVICE_ONLY__ -#include // for __ocl_vec_t #include // for __DPCPP_SYCL_EXTERNAL #include // for size_t -#include // for uint8_t - -#define __SPIRV_VAR_QUALIFIERS extern "C" const - -#if !(defined(__SPIR__) || defined(__SPIRV__)) - -__DPCPP_SYCL_EXTERNAL size_t __spirv_GlobalInvocationId_x(); -__DPCPP_SYCL_EXTERNAL size_t __spirv_GlobalInvocationId_y(); -__DPCPP_SYCL_EXTERNAL size_t __spirv_GlobalInvocationId_z(); - -__DPCPP_SYCL_EXTERNAL size_t __spirv_GlobalSize_x(); -__DPCPP_SYCL_EXTERNAL size_t __spirv_GlobalSize_y(); -__DPCPP_SYCL_EXTERNAL size_t __spirv_GlobalSize_z(); - -__DPCPP_SYCL_EXTERNAL size_t __spirv_GlobalOffset_x(); -__DPCPP_SYCL_EXTERNAL size_t __spirv_GlobalOffset_y(); -__DPCPP_SYCL_EXTERNAL size_t __spirv_GlobalOffset_z(); - -__DPCPP_SYCL_EXTERNAL size_t __spirv_NumWorkgroups_x(); -__DPCPP_SYCL_EXTERNAL size_t __spirv_NumWorkgroups_y(); -__DPCPP_SYCL_EXTERNAL size_t __spirv_NumWorkgroups_z(); - -__DPCPP_SYCL_EXTERNAL size_t __spirv_WorkgroupSize_x(); -__DPCPP_SYCL_EXTERNAL size_t __spirv_WorkgroupSize_y(); -__DPCPP_SYCL_EXTERNAL size_t __spirv_WorkgroupSize_z(); - -__DPCPP_SYCL_EXTERNAL size_t __spirv_WorkgroupId_x(); -__DPCPP_SYCL_EXTERNAL size_t __spirv_WorkgroupId_y(); -__DPCPP_SYCL_EXTERNAL size_t __spirv_WorkgroupId_z(); - -__DPCPP_SYCL_EXTERNAL size_t __spirv_LocalInvocationId_x(); -__DPCPP_SYCL_EXTERNAL size_t __spirv_LocalInvocationId_y(); -__DPCPP_SYCL_EXTERNAL size_t __spirv_LocalInvocationId_z(); - -__DPCPP_SYCL_EXTERNAL uint32_t __spirv_SubgroupSize(); -__DPCPP_SYCL_EXTERNAL uint32_t __spirv_SubgroupMaxSize(); -__DPCPP_SYCL_EXTERNAL uint32_t __spirv_NumSubgroups(); -__DPCPP_SYCL_EXTERNAL uint32_t __spirv_SubgroupId(); -__DPCPP_SYCL_EXTERNAL uint32_t __spirv_SubgroupLocalInvocationId(); - -#else // !(defined(__SPIR__) || defined(__SPIRV__)) - -typedef size_t size_t_vec __attribute__((ext_vector_type(3))); -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalSize; -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalInvocationId; -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupSize; -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInNumWorkgroups; -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInLocalInvocationId; -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupId; -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalOffset; - -__SPIRV_VAR_QUALIFIERS uint32_t __spirv_BuiltInSubgroupSize; -__SPIRV_VAR_QUALIFIERS uint32_t __spirv_BuiltInSubgroupMaxSize; -__SPIRV_VAR_QUALIFIERS uint32_t __spirv_BuiltInNumSubgroups; -__SPIRV_VAR_QUALIFIERS uint32_t __spirv_BuiltInSubgroupId; -__SPIRV_VAR_QUALIFIERS uint32_t __spirv_BuiltInSubgroupLocalInvocationId; - -__SPIRV_VAR_QUALIFIERS __ocl_vec_t __spirv_BuiltInSubgroupEqMask; -__SPIRV_VAR_QUALIFIERS __ocl_vec_t __spirv_BuiltInSubgroupGeMask; -__SPIRV_VAR_QUALIFIERS __ocl_vec_t __spirv_BuiltInSubgroupGtMask; -__SPIRV_VAR_QUALIFIERS __ocl_vec_t __spirv_BuiltInSubgroupLeMask; -__SPIRV_VAR_QUALIFIERS __ocl_vec_t __spirv_BuiltInSubgroupLtMask; - -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_GlobalInvocationId_x() { - return __spirv_BuiltInGlobalInvocationId.x; -} -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_GlobalInvocationId_y() { - return __spirv_BuiltInGlobalInvocationId.y; -} -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_GlobalInvocationId_z() { - return __spirv_BuiltInGlobalInvocationId.z; -} - -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_GlobalSize_x() { - return __spirv_BuiltInGlobalSize.x; -} -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_GlobalSize_y() { - return __spirv_BuiltInGlobalSize.y; -} -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_GlobalSize_z() { - return __spirv_BuiltInGlobalSize.z; -} - -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_GlobalOffset_x() { - return __spirv_BuiltInGlobalOffset.x; -} -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_GlobalOffset_y() { - return __spirv_BuiltInGlobalOffset.y; -} -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_GlobalOffset_z() { - return __spirv_BuiltInGlobalOffset.z; -} - -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_NumWorkgroups_x() { - return __spirv_BuiltInNumWorkgroups.x; -} -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_NumWorkgroups_y() { - return __spirv_BuiltInNumWorkgroups.y; -} -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_NumWorkgroups_z() { - return __spirv_BuiltInNumWorkgroups.z; -} - -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_WorkgroupSize_x() { - return __spirv_BuiltInWorkgroupSize.x; -} -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_WorkgroupSize_y() { - return __spirv_BuiltInWorkgroupSize.y; -} -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_WorkgroupSize_z() { - return __spirv_BuiltInWorkgroupSize.z; -} - -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_WorkgroupId_x() { - return __spirv_BuiltInWorkgroupId.x; -} -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_WorkgroupId_y() { - return __spirv_BuiltInWorkgroupId.y; -} -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_WorkgroupId_z() { - return __spirv_BuiltInWorkgroupId.z; -} - -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_LocalInvocationId_x() { - return __spirv_BuiltInLocalInvocationId.x; -} -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_LocalInvocationId_y() { - return __spirv_BuiltInLocalInvocationId.y; -} -__DPCPP_SYCL_EXTERNAL inline size_t __spirv_LocalInvocationId_z() { - return __spirv_BuiltInLocalInvocationId.z; -} - -__DPCPP_SYCL_EXTERNAL inline uint32_t __spirv_SubgroupSize() { - return __spirv_BuiltInSubgroupSize; -} -__DPCPP_SYCL_EXTERNAL inline uint32_t __spirv_SubgroupMaxSize() { - return __spirv_BuiltInSubgroupMaxSize; -} -__DPCPP_SYCL_EXTERNAL inline uint32_t __spirv_NumSubgroups() { - return __spirv_BuiltInNumSubgroups; -} -__DPCPP_SYCL_EXTERNAL inline uint32_t __spirv_SubgroupId() { - return __spirv_BuiltInSubgroupId; -} -__DPCPP_SYCL_EXTERNAL inline uint32_t __spirv_SubgroupLocalInvocationId() { - return __spirv_BuiltInSubgroupLocalInvocationId; -} - -#endif // !(defined(__SPIR__) || defined(__SPIRV__)) - -#undef __SPIRV_VAR_QUALIFIERS +#include // for uint32_t + +__DPCPP_SYCL_EXTERNAL size_t __spirv_BuiltInGlobalInvocationId(int); +__DPCPP_SYCL_EXTERNAL size_t __spirv_BuiltInGlobalSize(int); +__DPCPP_SYCL_EXTERNAL size_t __spirv_BuiltInGlobalOffset(int); +__DPCPP_SYCL_EXTERNAL size_t __spirv_BuiltInNumWorkgroups(int); +__DPCPP_SYCL_EXTERNAL size_t __spirv_BuiltInWorkgroupSize(int); +__DPCPP_SYCL_EXTERNAL size_t __spirv_BuiltInWorkgroupId(int); +__DPCPP_SYCL_EXTERNAL size_t __spirv_BuiltInLocalInvocationId(int); + +__DPCPP_SYCL_EXTERNAL uint32_t __spirv_BuiltInSubgroupSize(); +__DPCPP_SYCL_EXTERNAL uint32_t __spirv_BuiltInSubgroupMaxSize(); +__DPCPP_SYCL_EXTERNAL uint32_t __spirv_BuiltInNumSubgroups(); +__DPCPP_SYCL_EXTERNAL uint32_t __spirv_BuiltInSubgroupId(); +__DPCPP_SYCL_EXTERNAL uint32_t __spirv_BuiltInSubgroupLocalInvocationId(); namespace __spirv { @@ -173,9 +35,9 @@ namespace __spirv { // built-in variables #define __SPIRV_DEFINE_INIT_AND_GET_HELPERS(POSTFIX) \ template size_t get##POSTFIX(); \ - template <> size_t get##POSTFIX<0>() { return __spirv_##POSTFIX##_x(); } \ - template <> size_t get##POSTFIX<1>() { return __spirv_##POSTFIX##_y(); } \ - template <> size_t get##POSTFIX<2>() { return __spirv_##POSTFIX##_z(); } \ + template <> size_t get##POSTFIX<0>() { return __spirv_##POSTFIX(0); } \ + template <> size_t get##POSTFIX<1>() { return __spirv_##POSTFIX(1); } \ + template <> size_t get##POSTFIX<2>() { return __spirv_##POSTFIX(2); } \ \ template struct InitSizesST##POSTFIX; \ \ @@ -197,13 +59,13 @@ namespace __spirv { return InitSizesST##POSTFIX::initSize(); \ } -__SPIRV_DEFINE_INIT_AND_GET_HELPERS(GlobalSize); -__SPIRV_DEFINE_INIT_AND_GET_HELPERS(GlobalInvocationId) -__SPIRV_DEFINE_INIT_AND_GET_HELPERS(WorkgroupSize) -__SPIRV_DEFINE_INIT_AND_GET_HELPERS(NumWorkgroups) -__SPIRV_DEFINE_INIT_AND_GET_HELPERS(LocalInvocationId) -__SPIRV_DEFINE_INIT_AND_GET_HELPERS(WorkgroupId) -__SPIRV_DEFINE_INIT_AND_GET_HELPERS(GlobalOffset) +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalSize); +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalInvocationId) +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInWorkgroupSize) +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInNumWorkgroups) +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInLocalInvocationId) +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInWorkgroupId) +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalOffset) #undef __SPIRV_DEFINE_INIT_AND_GET_HELPERS diff --git a/sycl/include/sycl/detail/helpers.hpp b/sycl/include/sycl/detail/helpers.hpp index 41a15b783da0b..7bf1cd7b41a95 100644 --- a/sycl/include/sycl/detail/helpers.hpp +++ b/sycl/include/sycl/detail/helpers.hpp @@ -128,44 +128,48 @@ class Builder { template static const id getElement(id *) { static_assert(is_valid_dimensions, "invalid dimensions"); - return __spirv::initGlobalInvocationId>(); + return __spirv::initBuiltInGlobalInvocationId>(); } template static const group getElement(group *) { static_assert(is_valid_dimensions, "invalid dimensions"); - range GlobalSize{__spirv::initGlobalSize>()}; - range LocalSize{__spirv::initWorkgroupSize>()}; - range GroupRange{__spirv::initNumWorkgroups>()}; - id GroupId{__spirv::initWorkgroupId>()}; + range GlobalSize{__spirv::initBuiltInGlobalSize>()}; + range LocalSize{ + __spirv::initBuiltInWorkgroupSize>()}; + range GroupRange{ + __spirv::initBuiltInNumWorkgroups>()}; + id GroupId{__spirv::initBuiltInWorkgroupId>()}; return createGroup(GlobalSize, LocalSize, GroupRange, GroupId); } template static std::enable_if_t> getItem() { static_assert(is_valid_dimensions, "invalid dimensions"); - id GlobalId{__spirv::initGlobalInvocationId>()}; - range GlobalSize{__spirv::initGlobalSize>()}; - id GlobalOffset{__spirv::initGlobalOffset>()}; + id GlobalId{__spirv::initBuiltInGlobalInvocationId>()}; + range GlobalSize{__spirv::initBuiltInGlobalSize>()}; + id GlobalOffset{__spirv::initBuiltInGlobalOffset>()}; return createItem(GlobalSize, GlobalId, GlobalOffset); } template static std::enable_if_t> getItem() { static_assert(is_valid_dimensions, "invalid dimensions"); - id GlobalId{__spirv::initGlobalInvocationId>()}; - range GlobalSize{__spirv::initGlobalSize>()}; + id GlobalId{__spirv::initBuiltInGlobalInvocationId>()}; + range GlobalSize{__spirv::initBuiltInGlobalSize>()}; return createItem(GlobalSize, GlobalId); } template static const nd_item getElement(nd_item *) { static_assert(is_valid_dimensions, "invalid dimensions"); - range GlobalSize{__spirv::initGlobalSize>()}; - range LocalSize{__spirv::initWorkgroupSize>()}; - range GroupRange{__spirv::initNumWorkgroups>()}; - id GroupId{__spirv::initWorkgroupId>()}; - id GlobalId{__spirv::initGlobalInvocationId>()}; - id LocalId{__spirv::initLocalInvocationId>()}; - id GlobalOffset{__spirv::initGlobalOffset>()}; + range GlobalSize{__spirv::initBuiltInGlobalSize>()}; + range LocalSize{ + __spirv::initBuiltInWorkgroupSize>()}; + range GroupRange{ + __spirv::initBuiltInNumWorkgroups>()}; + id GroupId{__spirv::initBuiltInWorkgroupId>()}; + id GlobalId{__spirv::initBuiltInGlobalInvocationId>()}; + id LocalId{__spirv::initBuiltInLocalInvocationId>()}; + id GlobalOffset{__spirv::initBuiltInGlobalOffset>()}; group Group = createGroup(GlobalSize, LocalSize, GroupRange, GroupId); item GlobalItem = diff --git a/sycl/include/sycl/ext/oneapi/experimental/ballot_group.hpp b/sycl/include/sycl/ext/oneapi/experimental/ballot_group.hpp index 7a6f51cacc39e..0186d0840992c 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/ballot_group.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/ballot_group.hpp @@ -119,7 +119,7 @@ template class ballot_group { bool leader() const { #ifdef __SYCL_DEVICE_ONLY__ uint32_t Lowest = static_cast(Mask.find_low()[0]); - return __spirv_SubgroupLocalInvocationId() == Lowest; + return __spirv_BuiltInSubgroupLocalInvocationId() == Lowest; #else throw exception(make_error_code(errc::runtime), "Non-uniform groups are not supported on host."); diff --git a/sycl/include/sycl/ext/oneapi/experimental/fixed_size_group.hpp b/sycl/include/sycl/ext/oneapi/experimental/fixed_size_group.hpp index 56f30f2091a6b..748b55dd45193 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/fixed_size_group.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/fixed_size_group.hpp @@ -47,7 +47,7 @@ template class fixed_size_group { id_type get_group_id() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv_SubgroupLocalInvocationId() / PartitionSize; + return __spirv_BuiltInSubgroupLocalInvocationId() / PartitionSize; #else throw exception(make_error_code(errc::runtime), "Non-uniform groups are not supported on host."); @@ -56,7 +56,7 @@ template class fixed_size_group { id_type get_local_id() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv_SubgroupLocalInvocationId() % PartitionSize; + return __spirv_BuiltInSubgroupLocalInvocationId() % PartitionSize; #else throw exception(make_error_code(errc::runtime), "Non-uniform groups are not supported on host."); @@ -65,7 +65,7 @@ template class fixed_size_group { range_type get_group_range() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv_SubgroupSize() / PartitionSize; + return __spirv_BuiltInSubgroupSize() / PartitionSize; #else throw exception(make_error_code(errc::runtime), "Non-uniform groups are not supported on host."); diff --git a/sycl/include/sycl/ext/oneapi/experimental/opportunistic_group.hpp b/sycl/include/sycl/ext/oneapi/experimental/opportunistic_group.hpp index cbe8de03a9963..c6c28b37c93a7 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/opportunistic_group.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/opportunistic_group.hpp @@ -125,7 +125,7 @@ class opportunistic_group { bool leader() const { #ifdef __SYCL_DEVICE_ONLY__ uint32_t Lowest = static_cast(Mask.find_low()[0]); - return __spirv_SubgroupLocalInvocationId() == Lowest; + return __spirv_BuiltInSubgroupLocalInvocationId() == Lowest; #else throw exception(make_error_code(errc::runtime), "Non-uniform groups are not supported on host."); diff --git a/sycl/include/sycl/ext/oneapi/experimental/tangle_group.hpp b/sycl/include/sycl/ext/oneapi/experimental/tangle_group.hpp index 59af6bdfc753b..5ef93b4bc81d3 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/tangle_group.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/tangle_group.hpp @@ -118,7 +118,7 @@ template class tangle_group { bool leader() const { #ifdef __SYCL_DEVICE_ONLY__ uint32_t Lowest = static_cast(Mask.find_low()[0]); - return __spirv_SubgroupLocalInvocationId() == Lowest; + return __spirv_BuiltInSubgroupLocalInvocationId() == Lowest; #else throw exception(make_error_code(errc::runtime), "Non-uniform groups are not supported on host."); diff --git a/sycl/include/sycl/ext/oneapi/sub_group_mask.hpp b/sycl/include/sycl/ext/oneapi/sub_group_mask.hpp index 63e56327168e8..810b74e07d190 100644 --- a/sycl/include/sycl/ext/oneapi/sub_group_mask.hpp +++ b/sycl/include/sycl/ext/oneapi/sub_group_mask.hpp @@ -312,7 +312,7 @@ struct sub_group_mask { private: static size_t GetMaxLocalRangeSize() { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv_SubgroupMaxSize(); + return __spirv_BuiltInSubgroupMaxSize(); #else return max_bits; #endif diff --git a/sycl/include/sycl/group.hpp b/sycl/include/sycl/group.hpp index 9402d2fb83d0e..8315537d16253 100644 --- a/sycl/include/sycl/group.hpp +++ b/sycl/include/sycl/group.hpp @@ -133,7 +133,7 @@ template class __SYCL_TYPE(group) group { id get_local_id() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv::initLocalInvocationId>(); + return __spirv::initBuiltInLocalInvocationId>(); #else throw sycl::exception(make_error_code(errc::feature_not_supported), "get_local_id() is not implemented on host"); @@ -182,13 +182,13 @@ template class __SYCL_TYPE(group) group { detail::workGroupBarrier(); #ifdef __SYCL_DEVICE_ONLY__ range GlobalSize{ - __spirv::initGlobalSize>()}; + __spirv::initBuiltInGlobalSize>()}; range LocalSize{ - __spirv::initWorkgroupSize>()}; + __spirv::initBuiltInWorkgroupSize>()}; id GlobalId{ - __spirv::initGlobalInvocationId>()}; + __spirv::initBuiltInGlobalInvocationId>()}; id LocalId{ - __spirv::initLocalInvocationId>()}; + __spirv::initBuiltInLocalInvocationId>()}; // no 'iterate' in the device code variant, because // (1) this code is already invoked by each work item as a part of the @@ -232,13 +232,13 @@ template class __SYCL_TYPE(group) group { detail::workGroupBarrier(); #ifdef __SYCL_DEVICE_ONLY__ range GlobalSize{ - __spirv::initGlobalSize>()}; + __spirv::initBuiltInGlobalSize>()}; range LocalSize{ - __spirv::initWorkgroupSize>()}; + __spirv::initBuiltInWorkgroupSize>()}; id GlobalId{ - __spirv::initGlobalInvocationId>()}; + __spirv::initBuiltInGlobalInvocationId>()}; id LocalId{ - __spirv::initLocalInvocationId>()}; + __spirv::initBuiltInLocalInvocationId>()}; item GlobalItem = detail::Builder::createItem(GlobalSize, GlobalId); diff --git a/sycl/include/sycl/nd_item.hpp b/sycl/include/sycl/nd_item.hpp index 76ec89f2bba70..bf7ad5ff76b4f 100644 --- a/sycl/include/sycl/nd_item.hpp +++ b/sycl/include/sycl/nd_item.hpp @@ -50,7 +50,7 @@ template class nd_item { id get_global_id() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv::initGlobalInvocationId>(); + return __spirv::initBuiltInGlobalInvocationId>(); #else return {}; #endif @@ -81,7 +81,7 @@ template class nd_item { id get_local_id() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv::initLocalInvocationId>(); + return __spirv::initBuiltInLocalInvocationId>(); #else return {}; #endif @@ -143,7 +143,7 @@ template class nd_item { range get_group_range() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv::initNumWorkgroups>(); + return __spirv::initBuiltInNumWorkgroups>(); #else return {}; #endif @@ -157,7 +157,7 @@ template class nd_item { range get_global_range() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv::initGlobalSize>(); + return __spirv::initBuiltInGlobalSize>(); #else return {}; #endif @@ -171,7 +171,7 @@ template class nd_item { range get_local_range() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv::initWorkgroupSize>(); + return __spirv::initBuiltInWorkgroupSize>(); #else return {}; #endif @@ -186,7 +186,7 @@ template class nd_item { __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020") id get_offset() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv::initGlobalOffset>(); + return __spirv::initBuiltInGlobalOffset>(); #else return {}; #endif @@ -544,7 +544,7 @@ template class nd_item { id get_group_id() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv::initWorkgroupId>(); + return __spirv::initBuiltInWorkgroupId>(); #else return {}; #endif diff --git a/sycl/include/sycl/stl_wrappers/assert.h b/sycl/include/sycl/stl_wrappers/assert.h index f12971723ddf7..25e94c5b8f910 100644 --- a/sycl/include/sycl/stl_wrappers/assert.h +++ b/sycl/include/sycl/stl_wrappers/assert.h @@ -35,11 +35,13 @@ __devicelib_assert_fail(const char *, const char *, int32_t, const char *, #else #define assert(e) \ ((e) ? void(0) \ - : __devicelib_assert_fail( \ - #e, __FILE__, __LINE__, nullptr, __spirv_GlobalInvocationId_x(), \ - __spirv_GlobalInvocationId_y(), __spirv_GlobalInvocationId_z(), \ - __spirv_LocalInvocationId_x(), __spirv_LocalInvocationId_y(), \ - __spirv_LocalInvocationId_z())) + : __devicelib_assert_fail(#e, __FILE__, __LINE__, nullptr, \ + __spirv_BuiltInGlobalInvocationId(0), \ + __spirv_BuiltInGlobalInvocationId(1), \ + __spirv_BuiltInGlobalInvocationId(2), \ + __spirv_BuiltInLocalInvocationId(0), \ + __spirv_BuiltInLocalInvocationId(1), \ + __spirv_BuiltInLocalInvocationId(2))) #endif #endif #endif diff --git a/sycl/include/sycl/stl_wrappers/cassert b/sycl/include/sycl/stl_wrappers/cassert index ae63b0cc3281c..6ee3223beed45 100644 --- a/sycl/include/sycl/stl_wrappers/cassert +++ b/sycl/include/sycl/stl_wrappers/cassert @@ -35,10 +35,10 @@ __devicelib_assert_fail(const char *, const char *, int32_t, const char *, #define assert(e) \ ((e) ? void(0) \ : __devicelib_assert_fail( \ - #e, __FILE__, __LINE__, nullptr, __spirv_GlobalInvocationId_x(), \ - __spirv_GlobalInvocationId_y(), __spirv_GlobalInvocationId_z(), \ - __spirv_LocalInvocationId_x(), __spirv_LocalInvocationId_y(), \ - __spirv_LocalInvocationId_z())) + #e, __FILE__, __LINE__, nullptr, __spirv_BuiltInGlobalInvocationId(0), \ + __spirv_BuiltInGlobalInvocationId(1), __spirv_BuiltInGlobalInvocationId(2), \ + __spirv_BuiltInLocalInvocationId(0), __spirv_BuiltInLocalInvocationId(1), \ + __spirv_BuiltInLocalInvocationId(2))) #endif #endif #endif diff --git a/sycl/include/sycl/sub_group.hpp b/sycl/include/sycl/sub_group.hpp index 005b7746539f8..1cbed2091924a 100644 --- a/sycl/include/sycl/sub_group.hpp +++ b/sycl/include/sycl/sub_group.hpp @@ -146,7 +146,7 @@ struct sub_group { id_type get_local_id() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv_SubgroupLocalInvocationId(); + return __spirv_BuiltInSubgroupLocalInvocationId(); #else throw sycl::exception(make_error_code(errc::feature_not_supported), "Sub-groups are not supported on host."); @@ -164,7 +164,7 @@ struct sub_group { range_type get_local_range() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv_SubgroupSize(); + return __spirv_BuiltInSubgroupSize(); #else throw sycl::exception(make_error_code(errc::feature_not_supported), "Sub-groups are not supported on host."); @@ -173,7 +173,7 @@ struct sub_group { range_type get_max_local_range() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv_SubgroupMaxSize(); + return __spirv_BuiltInSubgroupMaxSize(); #else throw sycl::exception(make_error_code(errc::feature_not_supported), "Sub-groups are not supported on host."); @@ -182,7 +182,7 @@ struct sub_group { id_type get_group_id() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv_SubgroupId(); + return __spirv_BuiltInSubgroupId(); #else throw sycl::exception(make_error_code(errc::feature_not_supported), "Sub-groups are not supported on host."); @@ -200,7 +200,7 @@ struct sub_group { range_type get_group_range() const { #ifdef __SYCL_DEVICE_ONLY__ - return __spirv_NumSubgroups(); + return __spirv_BuiltInNumSubgroups(); #else throw sycl::exception(make_error_code(errc::feature_not_supported), "Sub-groups are not supported on host."); diff --git a/sycl/test/check_device_code/esimd/spirv_intrins_trans.cpp b/sycl/test/check_device_code/esimd/spirv_intrins_trans.cpp index a5e6e59d21c04..7477916cb557b 100644 --- a/sycl/test/check_device_code/esimd/spirv_intrins_trans.cpp +++ b/sycl/test/check_device_code/esimd/spirv_intrins_trans.cpp @@ -12,8 +12,8 @@ SYCL_ESIMD_KERNEL SYCL_EXTERNAL void kernel_SubgroupLocalInvocationId(size_t *DoNotOptimize, uint32_t *DoNotOptimize32) { - DoNotOptimize[0] = __spirv_SubgroupLocalInvocationId(); - DoNotOptimize32[0] = __spirv_SubgroupLocalInvocationId() + 3; + DoNotOptimize[0] = __spirv_BuiltInSubgroupLocalInvocationId(); + DoNotOptimize32[0] = __spirv_BuiltInSubgroupLocalInvocationId() + 3; // CHECK-LABEL: @{{.*}}kernel_SubgroupLocalInvocationId // CHECK: [[ZEXT0:%.*]] = zext i32 0 to i64 // CHECK: store i64 [[ZEXT0]] @@ -22,8 +22,8 @@ kernel_SubgroupLocalInvocationId(size_t *DoNotOptimize, SYCL_ESIMD_KERNEL SYCL_EXTERNAL void kernel_SubgroupSize(size_t *DoNotOptimize, uint32_t *DoNotOptimize32) { - DoNotOptimize[0] = __spirv_SubgroupSize(); - DoNotOptimize32[0] = __spirv_SubgroupSize() + 7; + DoNotOptimize[0] = __spirv_BuiltInSubgroupSize(); + DoNotOptimize32[0] = __spirv_BuiltInSubgroupSize() + 7; // CHECK-LABEL: @{{.*}}kernel_SubgroupSize // CHECK: [[ZEXT0:%.*]] = zext i32 1 to i64 // CHECK: store i64 [[ZEXT0]] @@ -32,8 +32,8 @@ kernel_SubgroupSize(size_t *DoNotOptimize, uint32_t *DoNotOptimize32) { SYCL_ESIMD_KERNEL SYCL_EXTERNAL void kernel_SubgroupMaxSize(size_t *DoNotOptimize, uint32_t *DoNotOptimize32) { - DoNotOptimize[0] = __spirv_SubgroupMaxSize(); - DoNotOptimize32[0] = __spirv_SubgroupMaxSize() + 9; + DoNotOptimize[0] = __spirv_BuiltInSubgroupMaxSize(); + DoNotOptimize32[0] = __spirv_BuiltInSubgroupMaxSize() + 9; // CHECK-LABEL: @{{.*}}kernel_SubgroupMaxSize // CHECK: [[ZEXT0:%.*]] = zext i32 1 to i64 // CHECK: store i64 [[ZEXT0]] diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_sub_group_size.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_sub_group_size.cpp index ad81d1db1fe0b..40a0cfab2c601 100644 --- a/sycl/test/check_device_code/extensions/properties/properties_kernel_sub_group_size.cpp +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_sub_group_size.cpp @@ -51,31 +51,31 @@ int main() { // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel17(){{.*}} #[[SGSizeAttr2]] Q.parallel_for(R3, {Ev}, Props, [](sycl::id<3>) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel18{{.*}}{{.*}} #[[SGSizeAttr3:[0-9]+]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel18{{.*}}{{.*}} #[[SGSizeAttr2:[0-9]+]] Q.parallel_for(R1, Props, Redu1, [](sycl::id<1>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel19{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel19{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(R1, Ev, Props, Redu1, [](sycl::id<1>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel20{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel20{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(R1, {Ev}, Props, Redu1, [](sycl::id<1>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel21{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel21{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(R2, Props, Redu1, [](sycl::id<2>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel22{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel22{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(R2, Ev, Props, Redu1, [](sycl::id<2>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel23{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel23{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(R2, {Ev}, Props, Redu1, [](sycl::id<2>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel24{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel24{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(R3, Props, Redu1, [](sycl::id<3>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel25{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel25{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(R3, Ev, Props, Redu1, [](sycl::id<3>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel26{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel26{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(R3, {Ev}, Props, Redu1, [](sycl::id<3>, auto &) {}); @@ -104,59 +104,59 @@ int main() { Q.parallel_for(NDR3, {Ev}, Props, [](sycl::nd_item<3>) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel36{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel36{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR1, Props, Redu1, [](sycl::nd_item<1>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel37{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel37{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR1, Ev, Props, Redu1, [](sycl::nd_item<1>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel38{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel38{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR1, {Ev}, Props, Redu1, [](sycl::nd_item<1>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel39{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel39{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR2, Props, Redu1, [](sycl::nd_item<2>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel40{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel40{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR2, Ev, Props, Redu1, [](sycl::nd_item<2>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel41{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel41{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR2, {Ev}, Props, Redu1, [](sycl::nd_item<2>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel42{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel42{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR3, Props, Redu1, [](sycl::nd_item<3>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel43{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel43{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR3, Ev, Props, Redu1, [](sycl::nd_item<3>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel44{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel44{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR3, {Ev}, Props, Redu1, [](sycl::nd_item<3>, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel45{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel45{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR1, Props, Redu1, Redu2, [](sycl::nd_item<1>, auto &, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel46{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel46{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR1, Ev, Props, Redu1, Redu2, [](sycl::nd_item<1>, auto &, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel47{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel47{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR1, {Ev}, Props, Redu1, Redu2, [](sycl::nd_item<1>, auto &, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel48{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel48{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR2, Props, Redu1, Redu2, [](sycl::nd_item<2>, auto &, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel49{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel49{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR2, Ev, Props, Redu1, Redu2, [](sycl::nd_item<2>, auto &, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel50{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel50{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR2, {Ev}, Props, Redu1, Redu2, [](sycl::nd_item<2>, auto &, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel51{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel51{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR3, Props, Redu1, Redu2, [](sycl::nd_item<3>, auto &, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel52{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel52{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR3, Ev, Props, Redu1, Redu2, [](sycl::nd_item<3>, auto &, auto &) {}); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel53{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel53{{.*}}{{.*}} #[[SGSizeAttr2]] Q.parallel_for(NDR3, {Ev}, Props, Redu1, Redu2, [](sycl::nd_item<3>, auto &, auto &) {}); @@ -186,17 +186,17 @@ int main() { CGH.parallel_for(R3, Props, [](sycl::id<3>) {}); }); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel60{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel60{{.*}}{{.*}} #[[SGSizeAttr2]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for(R1, Props, Redu1, [](sycl::id<1>, auto &) {}); }); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel61{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel61{{.*}}{{.*}} #[[SGSizeAttr2]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for(R2, Props, Redu1, [](sycl::id<2>, auto &) {}); }); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel62{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel62{{.*}}{{.*}} #[[SGSizeAttr2]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for(R3, Props, Redu1, [](sycl::id<3>, auto &) {}); @@ -218,53 +218,53 @@ int main() { [](sycl::nd_item<3>) {}); }); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel66{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel66{{.*}}{{.*}} #[[SGSizeAttr2]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for(NDR1, Props, Redu1, [](sycl::nd_item<1>, auto &) {}); }); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel67{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel67{{.*}}{{.*}} #[[SGSizeAttr2]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for(NDR2, Props, Redu1, [](sycl::nd_item<2>, auto &) {}); }); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel68{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel68{{.*}}{{.*}} #[[SGSizeAttr2]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for(NDR3, Props, Redu1, [](sycl::nd_item<3>, auto &) {}); }); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel69{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel69{{.*}}{{.*}} #[[SGSizeAttr2]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for( NDR1, Props, Redu1, Redu2, [](sycl::nd_item<1>, auto &, auto &) {}); }); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel70{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel70{{.*}}{{.*}} #[[SGSizeAttr2]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for( NDR2, Props, Redu1, Redu2, [](sycl::nd_item<2>, auto &, auto &) {}); }); - // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel71{{.*}}{{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}SGSizeKernel71{{.*}}{{.*}} #[[SGSizeAttr2]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for( NDR3, Props, Redu1, Redu2, [](sycl::nd_item<3>, auto &, auto &) {}); }); - // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel72(){{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel72(){{.*}} #[[SGSizeAttr2]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for_work_group( R1, Props, [](sycl::group<1> G) { G.parallel_for_work_item([&](sycl::h_item<1>) {}); }); }); - // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel73(){{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel73(){{.*}} #[[SGSizeAttr2]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for_work_group( R2, Props, [](sycl::group<2> G) { G.parallel_for_work_item([&](sycl::h_item<2>) {}); }); }); - // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel74(){{.*}} #[[SGSizeAttr3]] + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel74(){{.*}} #[[SGSizeAttr2]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for_work_group( R3, Props, [](sycl::group<3> G) { @@ -277,4 +277,3 @@ int main() { // CHECK-IR: attributes #[[SGSizeAttr1]] = { {{.*}}"sycl-sub-group-size"="1" // CHECK-IR: attributes #[[SGSizeAttr2]] = { {{.*}}"sycl-sub-group-size"="1" -// CHECK-IR: attributes #[[SGSizeAttr3]] = { {{.*}}"sycl-sub-group-size"="1" diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size.cpp index 63280fcc638f3..7ca3aac5cfc8c 100644 --- a/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size.cpp +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size.cpp @@ -266,21 +266,21 @@ int main() { NDR3, Props3, Redu1, Redu2, [](sycl::nd_item<3>, auto &, auto &) {}); }); - // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel72(){{.*}} #[[WGSizeAttr7]] + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel72(){{.*}} #[[WGSizeAttr4]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for_work_group( R1, Props1, [](sycl::group<1> G) { G.parallel_for_work_item([&](sycl::h_item<1>) {}); }); }); - // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel73(){{.*}} #[[WGSizeAttr8]] + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel73(){{.*}} #[[WGSizeAttr5]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for_work_group( R2, Props2, [](sycl::group<2> G) { G.parallel_for_work_item([&](sycl::h_item<2>) {}); }); }); - // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel74(){{.*}} #[[WGSizeAttr9]] + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel74(){{.*}} #[[WGSizeAttr6]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for_work_group( R3, Props3, [](sycl::group<3> G) { @@ -297,6 +297,3 @@ int main() { // CHECK-IR: attributes #[[WGSizeAttr4]] = { {{.*}}"sycl-work-group-size"="1" // CHECK-IR: attributes #[[WGSizeAttr5]] = { {{.*}}"sycl-work-group-size"="1,2" // CHECK-IR: attributes #[[WGSizeAttr6]] = { {{.*}}"sycl-work-group-size"="1,2,3" -// CHECK-IR: attributes #[[WGSizeAttr7]] = { {{.*}}"sycl-work-group-size"="1" -// CHECK-IR: attributes #[[WGSizeAttr8]] = { {{.*}}"sycl-work-group-size"="1,2" -// CHECK-IR: attributes #[[WGSizeAttr9]] = { {{.*}}"sycl-work-group-size"="1,2,3" diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size_hint.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size_hint.cpp index a0bae31ad8004..e667dd4ead471 100644 --- a/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size_hint.cpp +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size_hint.cpp @@ -275,21 +275,21 @@ int main() { NDR3, Props3, Redu1, Redu2, [](sycl::nd_item<3>, auto &, auto &) {}); }); - // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel72(){{.*}} #[[WGSizeHintAttr7]] + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel72(){{.*}} #[[WGSizeHintAttr4]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for_work_group( R1, Props1, [](sycl::group<1> G) { G.parallel_for_work_item([&](sycl::h_item<1>) {}); }); }); - // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel73(){{.*}} #[[WGSizeHintAttr8]] + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel73(){{.*}} #[[WGSizeHintAttr5]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for_work_group( R2, Props2, [](sycl::group<2> G) { G.parallel_for_work_item([&](sycl::h_item<2>) {}); }); }); - // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel74(){{.*}} #[[WGSizeHintAttr9]] + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel74(){{.*}} #[[WGSizeHintAttr6]] Q.submit([&](sycl::handler &CGH) { CGH.parallel_for_work_group( R3, Props3, [](sycl::group<3> G) { @@ -306,6 +306,3 @@ int main() { // CHECK-IR: attributes #[[WGSizeHintAttr4]] = { {{.*}}"sycl-work-group-size-hint"="1" // CHECK-IR: attributes #[[WGSizeHintAttr5]] = { {{.*}}"sycl-work-group-size-hint"="1,2" // CHECK-IR: attributes #[[WGSizeHintAttr6]] = { {{.*}}"sycl-work-group-size-hint"="1,2,3" -// CHECK-IR: attributes #[[WGSizeHintAttr7]] = { {{.*}}"sycl-work-group-size-hint"="1" -// CHECK-IR: attributes #[[WGSizeHintAttr8]] = { {{.*}}"sycl-work-group-size-hint"="1,2" -// CHECK-IR: attributes #[[WGSizeHintAttr9]] = { {{.*}}"sycl-work-group-size-hint"="1,2,3" diff --git a/sycl/test/check_device_code/group_barrier.cpp b/sycl/test/check_device_code/group_barrier.cpp index 9789cee70f545..e16f860288bd7 100644 --- a/sycl/test/check_device_code/group_barrier.cpp +++ b/sycl/test/check_device_code/group_barrier.cpp @@ -1,3 +1,5 @@ + + // RUN: %clangxx -fsycl-device-only -fsycl-unnamed-lambda -S -Xclang -emit-llvm -Xclang -no-enable-noundef-analysis %s -o - | FileCheck %s #include @@ -47,9 +49,9 @@ SYCL_EXTERNAL void test_3d(sycl::nd_item<3> item) { // CHECK: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 2, i32 1, i32 912) // CHECK: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 2, i32 0, i32 912) -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 2, i32 2, i32 912) -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 2, i32 4, i32 912) -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 2, i32 3, i32 912) -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 2, i32 2, i32 912) -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 2, i32 1, i32 912) -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 2, i32 0, i32 912) +// CHECK: call spir_func void @_Z22__spirv_ControlBarrieriii(i32 2, i32 2, i32 912) +// CHECK: call spir_func void @_Z22__spirv_ControlBarrieriii(i32 2, i32 4, i32 912) +// CHECK: call spir_func void @_Z22__spirv_ControlBarrieriii(i32 2, i32 3, i32 912) +// CHECK: call spir_func void @_Z22__spirv_ControlBarrieriii(i32 2, i32 2, i32 912) +// CHECK: call spir_func void @_Z22__spirv_ControlBarrieriii(i32 2, i32 1, i32 912) +// CHECK: call spir_func void @_Z22__spirv_ControlBarrieriii(i32 2, i32 0, i32 912) diff --git a/sycl/test/check_device_code/group_load.cpp b/sycl/test/check_device_code/group_load.cpp index e45e518c71a8b..9b44eaef4ae2d 100644 --- a/sycl/test/check_device_code/group_load.cpp +++ b/sycl/test/check_device_code/group_load.cpp @@ -52,22 +52,22 @@ namespace blocked { // CHECK-GLOBAL-LABEL: @_ZN7blocked10test_naiveERN4sycl3_V19sub_groupEPU3AS1iRi( // CHECK-GLOBAL-NEXT: entry: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7:[0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[TMP0]] to i64 +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META7:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8:![0-9]+]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA10:![0-9]+]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7blocked10test_naiveERN4sycl3_V19sub_groupEPU3AS3iRi( // CHECK-LOCAL-NEXT: entry: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6:[0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[TMP0]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META7:![0-9]+]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8:![0-9]+]] -// CHECK-LOCAL-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA10:![0-9]+]] +// CHECK-LOCAL-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void // @@ -94,11 +94,11 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, plain_ptr p, int &out) { // CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META14:![0-9]+]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-LOCAL-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IINS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_SP_RSQ_SS__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: @@ -132,11 +132,11 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META17:![0-9]+]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-LOCAL-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IINS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_SN_RSO_SQ__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: @@ -162,16 +162,16 @@ using accessor_iter_t = local_accessor::iterator; // CHECK-GLOBAL-LABEL: @_ZN7blocked18test_accessor_iterERN4sycl3_V19sub_groupERNS1_6detail17accessor_iteratorIKiLi1EEERi( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[AGG_TMP1_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITER:%.*]], align 8, !tbaa [[TBAA15:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[AGG_TMP1_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITER:%.*]], align 8, !tbaa [[TBAA14:![0-9]+]] // CHECK-GLOBAL-NEXT: [[AGG_TMP1_SROA_2_0_ITER_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[ITER]], i64 8 -// CHECK-GLOBAL-NEXT: [[AGG_TMP1_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr addrspace(4) [[AGG_TMP1_SROA_2_0_ITER_SROA_IDX]], align 8, !tbaa [[TBAA18:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[AGG_TMP1_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr addrspace(4) [[AGG_TMP1_SROA_2_0_ITER_SROA_IDX]], align 8, !tbaa [[TBAA17:![0-9]+]] // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-GLOBAL-NEXT: [[CONV3_I_I_I:%.*]] = sext i32 [[TMP0]] to i64 -// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP1_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP1_SROA_2_0_COPYLOAD]] -// CHECK-GLOBAL-NEXT: [[ADD_PTR_I_I_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP1]], i64 [[CONV3_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP1_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP1_SROA_2_0_COPYLOAD]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META19:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[CONV3_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ADD_PTR_I_I_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP0]], i64 [[CONV3_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // @@ -180,11 +180,11 @@ using accessor_iter_t = local_accessor::iterator; // CHECK-LOCAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 // CHECK-LOCAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span", align 8 // CHECK-LOCAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.15", align 1 -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITER:%.*]], align 8, !tbaa [[TBAA15:![0-9]+]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITER:%.*]], align 8, !tbaa [[TBAA20:![0-9]+]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 3, ptr nonnull [[AGG_TMP2_I]]) -// CHECK-LOCAL-NEXT: store ptr addrspace(4) [[OUT:%.*]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA18:![0-9]+]] +// CHECK-LOCAL-NEXT: store ptr addrspace(4) [[OUT:%.*]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA23:![0-9]+]] // CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPiiLm1ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_14full_group_keyEJEEENSA_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEESP_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr addrspace(4) noundef [[TMP0]], ptr noundef nonnull byval(%"class.sycl::_V1::span") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.15") align 1 [[AGG_TMP2_I]]) #[[ATTR6]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) @@ -208,7 +208,7 @@ SYCL_EXTERNAL void test_accessor_iter(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP2_I]]) // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP3_I]]) // CHECK-GLOBAL-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 8 dereferenceable(80) [[AGG_TMP14]], ptr addrspace(4) noundef align 8 dereferenceable(80) [[ITER:%.*]], i64 80, i1 false) -// CHECK-GLOBAL-NEXT: store ptr addrspace(4) [[OUT:%.*]], ptr [[AGG_TMP2_I]], align 8, !tbaa [[TBAA21:![0-9]+]] +// CHECK-GLOBAL-NEXT: store ptr addrspace(4) [[OUT:%.*]], ptr [[AGG_TMP2_I]], align 8, !tbaa [[TBAA22:![0-9]+]] // CHECK-GLOBAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm1ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSD_INS3_21contiguous_memory_keyEJEEENSD_INS3_14full_group_keyEJEEENSD_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP14]], ptr noundef nonnull byval(%"class.sycl::_V1::span") align 8 [[AGG_TMP2_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.7") align 1 [[AGG_TMP3_I]]) #[[ATTR7]] // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 80, ptr nonnull [[AGG_TMP14]]) // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) @@ -221,11 +221,11 @@ SYCL_EXTERNAL void test_accessor_iter(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 // CHECK-LOCAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span", align 8 // CHECK-LOCAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.7", align 1 -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITER:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITER:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) -// CHECK-LOCAL-NEXT: store ptr addrspace(4) [[OUT:%.*]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA18]] +// CHECK-LOCAL-NEXT: store ptr addrspace(4) [[OUT:%.*]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA23]] // CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPiiLm1ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESQ_SO_NS0_4spanISP_XT2_EEESR_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr addrspace(4) noundef [[TMP0]], ptr noundef nonnull byval(%"class.sycl::_V1::span") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.7") align 1 [[AGG_TMP2_I]]) #[[ATTR6]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) @@ -249,11 +249,11 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META30:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA25:![0-9]+]] -// CHECK-GLOBAL-NEXT: store i8 [[TMP2]], ptr addrspace(4) [[OUT:%.*]], align 1, !tbaa [[TBAA25]] +// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA33:![0-9]+]] +// CHECK-GLOBAL-NEXT: store i8 [[TMP1]], ptr addrspace(4) [[OUT:%.*]], align 1, !tbaa [[TBAA33]] // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_SP_RSQ_SS__EXIT:%.*]] // CHECK-GLOBAL: if.end.i.i: @@ -273,11 +273,11 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META37:![0-9]+]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA23:![0-9]+]] -// CHECK-LOCAL-NEXT: store i8 [[TMP2]], ptr addrspace(4) [[OUT:%.*]], align 1, !tbaa [[TBAA23]] +// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA40:![0-9]+]] +// CHECK-LOCAL-NEXT: store i8 [[TMP1]], ptr addrspace(4) [[OUT:%.*]], align 1, !tbaa [[TBAA40]] // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3CCNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_SP_RSQ_SS__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: @@ -296,7 +296,7 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // CHECK-GLOBAL-LABEL: @_ZN7blocked16test_four_shortsERN4sycl3_V19sub_groupEPU3AS1sNS1_4spanIsLm4EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA27:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA34:![0-9]+]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) @@ -306,8 +306,6 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META29:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP3]], 2 // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -315,14 +313,16 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META36:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[CALL_I_I_I_I_I_I]], 2 // CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA32:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA39:![0-9]+]] // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA32]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA39]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] @@ -335,7 +335,7 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // // CHECK-LOCAL-LABEL: @_ZN7blocked16test_four_shortsERN4sycl3_V19sub_groupEPU3AS3sNS1_4spanIsLm4EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA25:![0-9]+]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA41:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) @@ -345,8 +345,6 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META27:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP3]], 2 // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -354,14 +352,16 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS3SSLM4ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META43:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[CALL_I_I_I_I_I_I]], 2 // CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30:![0-9]+]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i16, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA46:![0-9]+]] // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: store i16 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA46]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS3ssLm4ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] @@ -381,11 +381,9 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-LABEL: @_ZN7blocked21test_non_power_of_twoERN4sycl3_V19sub_groupEPU3AS1iNS1_4spanIiLm3EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA14]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META37:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 3 // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -393,25 +391,25 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM3ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META43:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I_I_I_I_I]], 3 // CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7blocked21test_non_power_of_twoERN4sycl3_V19sub_groupEPU3AS3iNS1_4spanIiLm3EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META35:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 3 // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -419,14 +417,16 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM3ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META50:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I_I_I_I_I]], 3 // CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm3ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void @@ -439,11 +439,9 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-LABEL: @_ZN7blocked14test_four_intsERN4sycl3_V19sub_groupEPU3AS1iNS1_4spanIiLm4EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA14]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META42:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP2]], 2 // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -451,25 +449,25 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META47:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[CALL_I_I_I_I_I_I]], 2 // CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm4ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7blocked14test_four_intsERN4sycl3_V19sub_groupEPU3AS3iNS1_4spanIiLm4EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META40:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP2]], 2 // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -477,14 +475,16 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META54:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[CALL_I_I_I_I_I_I]], 2 // CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP43:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm4ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void @@ -498,11 +498,9 @@ SYCL_EXTERNAL void test_four_ints(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-LABEL: @_ZN7blocked15test_seven_intsERN4sycl3_V19sub_groupEPU3AS1iNS1_4spanIiLm7EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA14]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META47:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 7 // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -510,25 +508,25 @@ SYCL_EXTERNAL void test_four_ints(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM7ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META51:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I_I_I_I_I]], 7 // CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP50:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm7ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7blocked15test_seven_intsERN4sycl3_V19sub_groupEPU3AS3iNS1_4spanIiLm7EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META45:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 7 // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -536,14 +534,16 @@ SYCL_EXTERNAL void test_four_ints(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM7ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META58:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I_I_I_I_I]], 7 // CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP48:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm7ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void @@ -561,11 +561,9 @@ namespace striped { // CHECK-GLOBAL-LABEL: @_ZN7striped10test_naiveERN4sycl3_V19sub_groupEPU3AS1iNS1_4spanIiLm2EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA14]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META52:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META55:![0-9]+]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] @@ -573,26 +571,26 @@ namespace striped { // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_SN_NS0_4SPANISO_XT2_EEESQ__EXIT:%.*]] // CHECK-GLOBAL: for.body.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META55:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I2_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META58:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[CALL_I_I2_I_I_I]], [[I_0_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I]], [[MUL_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP58:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_SN_NS0_4spanISO_XT2_EEESQ_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped10test_naiveERN4sycl3_V19sub_groupEPU3AS3iNS1_4spanIiLm2EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META50:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META53:![0-9]+]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] @@ -600,15 +598,17 @@ namespace striped { // CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_SN_NS0_4SPANISO_XT2_EEESQ__EXIT:%.*]] // CHECK-LOCAL: for.body.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META62:![0-9]+]] +// CHECK-LOCAL-NEXT: [[CALL_I_I2_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR6]], !noalias [[META65:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[CALL_I_I2_I_I_I]], [[I_0_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I]], [[MUL_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP56:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_SN_NS0_4spanISO_XT2_EEESQ_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void @@ -621,7 +621,7 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-LABEL: @_ZN7striped14test_optimizedERN4sycl3_V19sub_groupEPU3AS1iNS1_4spanIiLm2EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA14]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) @@ -631,7 +631,7 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, plain_ptr p, // // CHECK-LOCAL-LABEL: @_ZN7striped14test_optimizedERN4sycl3_V19sub_groupEPU3AS3iNS1_4spanIiLm2EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) @@ -641,8 +641,6 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META58:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META61:![0-9]+]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -650,15 +648,17 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS3IILM2ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META69:![0-9]+]] +// CHECK-LOCAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR6]], !noalias [[META72:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP64:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS3iiLm2ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] @@ -693,11 +693,12 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META76:![0-9]+]] +// CHECK-LOCAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR6]], !noalias [[META79:![0-9]+]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-LOCAL-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IINS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_SN_RSO_SQ__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: @@ -722,15 +723,13 @@ using accessor_iter_t = local_accessor::iterator; // CHECK-GLOBAL-LABEL: @_ZN7striped18test_accessor_iterERN4sycl3_V19sub_groupERNS1_6detail17accessor_iteratorIKiLi1EEENS1_4spanIiLm2EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[AGG_TMP1_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITER:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[AGG_TMP1_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITER:%.*]], align 8, !tbaa [[TBAA14]] // CHECK-GLOBAL-NEXT: [[AGG_TMP1_SROA_2_0_ITER_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[ITER]], i64 8 -// CHECK-GLOBAL-NEXT: [[AGG_TMP1_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr addrspace(4) [[AGG_TMP1_SROA_2_0_ITER_SROA_IDX]], align 8, !tbaa [[TBAA18]] -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[AGG_TMP1_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr addrspace(4) [[AGG_TMP1_SROA_2_0_ITER_SROA_IDX]], align 8, !tbaa [[TBAA17]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA14]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META62:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META65:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP1_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP1_SROA_2_0_COPYLOAD]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP1_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP1_SROA_2_0_COPYLOAD]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -738,13 +737,15 @@ using accessor_iter_t = local_accessor::iterator; // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSD_INS3_14FULL_GROUP_KEYEJEEENSD_INSB_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META62:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META65:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-GLOBAL-NEXT: [[CONV3_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ADD_PTR_I_I_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP4]], i64 [[CONV3_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ADD_PTR_I_I_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP2]], i64 [[CONV3_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSD_INS3_14full_group_keyEJEEENSD_INSB_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: @@ -756,12 +757,12 @@ using accessor_iter_t = local_accessor::iterator; // CHECK-LOCAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 // CHECK-LOCAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 // CHECK-LOCAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.30", align 1 -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITER:%.*]], align 8, !tbaa [[TBAA15]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITER:%.*]], align 8, !tbaa [[TBAA20]] +// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 3, ptr nonnull [[AGG_TMP2_I]]) -// CHECK-LOCAL-NEXT: store i64 [[TMP1]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: store i64 [[TMP1]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPiiLm2ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_14full_group_keyEJEEENSA_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEESP_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr addrspace(4) noundef [[TMP0]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.30") align 1 [[AGG_TMP2_I]]) #[[ATTR6]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) @@ -780,13 +781,13 @@ SYCL_EXTERNAL void test_accessor_iter(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 // CHECK-GLOBAL-NEXT: [[AGG_TMP3_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 // CHECK-GLOBAL-NEXT: [[AGG_TMP15:%.*]] = alloca %"class.sycl::_V1::detail::accessor_iterator", align 8 -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA14]] // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 80, ptr nonnull [[AGG_TMP15]]) // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP2_I]]) // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP3_I]]) // CHECK-GLOBAL-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 8 dereferenceable(80) [[AGG_TMP15]], ptr addrspace(4) noundef align 8 dereferenceable(80) [[ITER:%.*]], i64 80, i1 false) -// CHECK-GLOBAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP2_I]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP2_I]], align 8, !tbaa [[TBAA14]] // CHECK-GLOBAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSD_INS3_21contiguous_memory_keyEJEEENSD_INS3_14full_group_keyEJEEENSD_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP15]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP2_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP3_I]]) #[[ATTR7]] // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 80, ptr nonnull [[AGG_TMP15]]) // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) @@ -799,12 +800,12 @@ SYCL_EXTERNAL void test_accessor_iter(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 // CHECK-LOCAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 // CHECK-LOCAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITER:%.*]], align 8, !tbaa [[TBAA15]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITER:%.*]], align 8, !tbaa [[TBAA20]] +// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) -// CHECK-LOCAL-NEXT: store i64 [[TMP1]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: store i64 [[TMP1]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPiiLm2ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESQ_SO_NS0_4spanISP_XT2_EEESR_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr addrspace(4) noundef [[TMP0]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP2_I]]) #[[ATTR6]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) @@ -830,8 +831,6 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META89:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META92:![0-9]+]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -839,13 +838,15 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS1CCLM2ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META89:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META92:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i8, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA25]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA33]] // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: store i8 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 1, !tbaa [[TBAA25]] +// CHECK-GLOBAL-NEXT: store i8 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 1, !tbaa [[TBAA33]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP95:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS1ccLm2ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: @@ -860,7 +861,7 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // // CHECK-LOCAL-LABEL: @_ZN7striped24test_runtime_align_checkERN4sycl3_V19sub_groupEPU3AS3cNS1_4spanIcLm2EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA101:![0-9]+]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA116:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) @@ -870,8 +871,6 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META103:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META106:![0-9]+]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -879,15 +878,17 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS3CCLM2ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META118:![0-9]+]] +// CHECK-LOCAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR6]], !noalias [[META121:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i8, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA23]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA40]] // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: store i8 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 1, !tbaa [[TBAA23]] +// CHECK-LOCAL-NEXT: store i8 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 1, !tbaa [[TBAA40]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP109:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP124:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS3ccLm2ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3CCLM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] @@ -908,7 +909,7 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // CHECK-GLOBAL-LABEL: @_ZN7striped16test_four_shortsERN4sycl3_V19sub_groupEPU3AS1sNS1_4spanIsLm4EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA27]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA34]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) @@ -918,8 +919,6 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META97:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META100:![0-9]+]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -927,15 +926,17 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META96:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META99:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA32]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA39]] // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA32]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA39]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP103:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP102:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] @@ -948,7 +949,7 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // // CHECK-LOCAL-LABEL: @_ZN7striped16test_four_shortsERN4sycl3_V19sub_groupEPU3AS3sNS1_4spanIsLm4EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA25]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA41]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) @@ -958,8 +959,6 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META111:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META114:![0-9]+]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -967,15 +966,17 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS3SSLM4ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META125:![0-9]+]] +// CHECK-LOCAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR6]], !noalias [[META128:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i16, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA46]] // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: store i16 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA46]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP117:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP131:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS3ssLm4ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] @@ -995,7 +996,7 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-LABEL: @_ZN7striped19test_sixteen_shortsERN4sycl3_V19sub_groupEPU3AS1sNS1_4spanIsLm16EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA27]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA34]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) @@ -1005,8 +1006,6 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META105:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META108:![0-9]+]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -1014,15 +1013,17 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS1SSLM16ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META103:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META106:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA32]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA39]] // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA32]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA39]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP111:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP109:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS1ssLm16ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM16ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] @@ -1035,7 +1036,7 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, plain_ptr p, // // CHECK-LOCAL-LABEL: @_ZN7striped19test_sixteen_shortsERN4sycl3_V19sub_groupEPU3AS3sNS1_4spanIsLm16EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA25]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA41]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) @@ -1045,8 +1046,6 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META119:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META122:![0-9]+]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -1054,15 +1053,17 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS3SSLM16ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META132:![0-9]+]] +// CHECK-LOCAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR6]], !noalias [[META135:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i16, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA46]] // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: store i16 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA46]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP125:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP138:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS3ssLm16ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3SSLM16ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] @@ -1080,11 +1081,9 @@ SYCL_EXTERNAL void test_sixteen_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-LABEL: @_ZN7striped21test_non_power_of_twoERN4sycl3_V19sub_groupEPU3AS1iNS1_4spanIiLm3EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA14]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META113:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META116:![0-9]+]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -1092,26 +1091,26 @@ SYCL_EXTERNAL void test_sixteen_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM3ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META110:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META113:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP119:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP116:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped21test_non_power_of_twoERN4sycl3_V19sub_groupEPU3AS3iNS1_4spanIiLm3EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META127:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META130:![0-9]+]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -1119,15 +1118,17 @@ SYCL_EXTERNAL void test_sixteen_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM3ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META139:![0-9]+]] +// CHECK-LOCAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR6]], !noalias [[META142:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP133:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP145:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm3ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void @@ -1140,11 +1141,9 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-LABEL: @_ZN7striped17test_sixteen_intsERN4sycl3_V19sub_groupEPU3AS1iNS1_4spanIiLm16EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA14]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META121:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META124:![0-9]+]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -1152,26 +1151,26 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM16ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META117:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META120:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP127:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP123:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm16ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped17test_sixteen_intsERN4sycl3_V19sub_groupEPU3AS3iNS1_4spanIiLm16EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META135:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META138:![0-9]+]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -1179,15 +1178,17 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM16ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META146:![0-9]+]] +// CHECK-LOCAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR6]], !noalias [[META149:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP141:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP152:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm16ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void @@ -1201,11 +1202,9 @@ SYCL_EXTERNAL void test_sixteen_ints(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-LABEL: @_ZN7striped16test_eleven_intsERN4sycl3_V19sub_groupEPU3AS1iNS1_4spanIiLm11EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA14]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META129:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META132:![0-9]+]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -1213,26 +1212,26 @@ SYCL_EXTERNAL void test_sixteen_ints(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM11ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META124:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META127:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP135:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP130:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm11ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped16test_eleven_intsERN4sycl3_V19sub_groupEPU3AS3iNS1_4spanIiLm11EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META143:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META146:![0-9]+]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -1240,15 +1239,17 @@ SYCL_EXTERNAL void test_sixteen_ints(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM11ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR6]], !noalias [[META153:![0-9]+]] +// CHECK-LOCAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR6]], !noalias [[META156:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP149:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP159:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm11ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void diff --git a/sycl/test/check_device_code/group_load_store_alignment.cpp b/sycl/test/check_device_code/group_load_store_alignment.cpp index 1de28486460d4..542280effcec0 100644 --- a/sycl/test/check_device_code/group_load_store_alignment.cpp +++ b/sycl/test/check_device_code/group_load_store_alignment.cpp @@ -30,7 +30,7 @@ using plain_ptr = typename sycl::detail::DecoratedType< // CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3:[0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 +// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA8:![0-9]+]] @@ -75,7 +75,7 @@ SYCL_EXTERNAL void test_load_with_alignment_hint(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] -// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 +// CHECK-GLOBAL-NEXT: tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] // CHECK-GLOBAL-NEXT: store i32 [[V:%.*]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA14:![0-9]+]] diff --git a/sycl/test/check_device_code/group_load_store_native_key.cpp b/sycl/test/check_device_code/group_load_store_native_key.cpp index a30b89616cd1a..490d420a9d0f8 100644 --- a/sycl/test/check_device_code/group_load_store_native_key.cpp +++ b/sycl/test/check_device_code/group_load_store_native_key.cpp @@ -43,11 +43,11 @@ using plain_ptr = typename sycl::detail::DecoratedType< // CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3:[0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP0]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR3]], !noalias [[META7:![0-9]+]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8:![0-9]+]] -// CHECK-LOCAL-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10:![0-9]+]] +// CHECK-LOCAL-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] // CHECK-LOCAL-NEXT: ret void // @@ -73,11 +73,11 @@ SYCL_EXTERNAL void test_load(sycl::sub_group &sg, plain_ptr p, int &out) { // CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR3]], !noalias [[META14:![0-9]+]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-LOCAL-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT:%.*]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IINS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_SP_RSQ_SS__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: @@ -92,6 +92,7 @@ SYCL_EXTERNAL void test_load_native(sycl::sub_group &sg, plain_ptr p, group_load(sg, p, out, opt_blocked_native{}); } +// // CHECK-GLOBAL-LABEL: @_Z10test_storeRN4sycl3_V19sub_groupEiPU3AS1i( // CHECK-GLOBAL-NEXT: entry: // CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null @@ -102,8 +103,8 @@ SYCL_EXTERNAL void test_load_native(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I25_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] -// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR3]], !noalias [[META7:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] // CHECK-GLOBAL-NEXT: store i32 [[V:%.*]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10:![0-9]+]] // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] @@ -119,10 +120,10 @@ SYCL_EXTERNAL void test_load_native(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP0]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR3]], !noalias [[META17:![0-9]+]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[V:%.*]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[V:%.*]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] // CHECK-LOCAL-NEXT: ret void // @@ -140,8 +141,8 @@ SYCL_EXTERNAL void test_store(sycl::sub_group &sg, int v, plain_ptr p) { // CHECK-GLOBAL-NEXT: br i1 [[CMP_I25_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] -// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR3]], !noalias [[META14:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] // CHECK-GLOBAL-NEXT: store i32 [[V:%.*]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] @@ -162,10 +163,10 @@ SYCL_EXTERNAL void test_store(sycl::sub_group &sg, int v, plain_ptr p) { // CHECK-LOCAL-NEXT: br i1 [[CMP_I25_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR3]], !noalias [[META20:![0-9]+]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[V:%.*]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[V:%.*]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_RKSP_SQ_SS__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: diff --git a/sycl/test/check_device_code/group_store.cpp b/sycl/test/check_device_code/group_store.cpp index 8a825222339ca..9fc99569eb54c 100644 --- a/sycl/test/check_device_code/group_store.cpp +++ b/sycl/test/check_device_code/group_store.cpp @@ -53,20 +53,20 @@ namespace blocked { // CHECK-GLOBAL-LABEL: @_ZN7blocked10test_naiveERN4sycl3_V19sub_groupEiPU3AS1i( // CHECK-GLOBAL-NEXT: entry: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7:[0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[TMP0]] to i64 +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META7:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[V:%.*]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8:![0-9]+]] +// CHECK-GLOBAL-NEXT: store i32 [[V:%.*]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA10:![0-9]+]] // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7blocked10test_naiveERN4sycl3_V19sub_groupEiPU3AS3i( // CHECK-LOCAL-NEXT: entry: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7:[0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[TMP0]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META7:![0-9]+]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[V:%.*]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8:![0-9]+]] +// CHECK-LOCAL-NEXT: store i32 [[V:%.*]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA10:![0-9]+]] // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void // @@ -81,14 +81,14 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, int v, plain_ptr p) { // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 // CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 -// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL-NEXT: [[CMP_I25_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I25_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META14:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[V:%.*]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[V:%.*]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_RKSP_SQ_SS__EXIT:%.*]] // CHECK-GLOBAL: if.end.i.i: @@ -103,14 +103,14 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, int v, plain_ptr p) { // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 // CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 -// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL-NEXT: [[CMP_I25_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I25_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META14:![0-9]+]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[V:%.*]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[V:%.*]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_RKSP_SQ_SS__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: @@ -131,14 +131,14 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, int v, // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 // CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 -// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL-NEXT: [[CMP_I25_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I25_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META17:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[V:%.*]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[V:%.*]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_RKSN_SO_SQ__EXIT:%.*]] // CHECK-GLOBAL: if.end.i.i: @@ -153,14 +153,14 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, int v, // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 // CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 -// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL-NEXT: [[CMP_I25_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I25_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META17:![0-9]+]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[CALL_I_I_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[V:%.*]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[V:%.*]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_RKSN_SO_SQ__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: @@ -181,29 +181,29 @@ using accessor_iter_t = accessor v, // CHECK-GLOBAL-LABEL: @_ZN7blocked22test_four_const_shortsERN4sycl3_V19sub_groupENS1_4spanIKsLm4EEEPU3AS1s( // CHECK-GLOBAL-NEXT: entry: // CHECK-GLOBAL-NEXT: [[VALUES_I_I:%.*]] = alloca [4 x i16], align 2 -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA25]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA39]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 // CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL-NEXT: [[CMP_I25_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I25_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META37:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP3]], 2 // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INS4_9NAIVE_KEYEJEEENSC_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESU_NS0_4SPANISS_XT1_EEEST_SV__EXIT_I_I:%.*]] +// CHECK-GLOBAL-NEXT: [[CMP_I29_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I29_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INS4_9NAIVE_KEYEJEEENSC_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESU_NS0_4SPANISS_XT1_EEEST_SV__EXIT_I_I:%.*]] // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA41]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META50:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[CALL_I_I_I_I_I_I]], 2 // CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA41]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEKsLm4EPU3AS1sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INS4_9naive_keyEJEEENSC_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESU_NS0_4spanISS_XT1_EEEST_SV_.exit.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_ST__EXIT:%.*]] @@ -424,51 +424,51 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, span v, // CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] // CHECK-GLOBAL: for.cond.cleanup.i.i: -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1mm(ptr addrspace(1) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i64, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA48]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1mm(ptr addrspace(1) noundef nonnull [[P]], i64 noundef [[TMP4]]) #[[ATTR7]] // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_ST__EXIT]] // CHECK-GLOBAL: for.body.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I30_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I30_I_I]], align 2, !tbaa [[TBAA41]] // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP6]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP5]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA41]] // CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS1sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INSA_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESS_NS0_4spanISQ_XT1_EEESR_ST_.exit: // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7blocked22test_four_const_shortsERN4sycl3_V19sub_groupENS1_4spanIKsLm4EEEPU3AS3s( // CHECK-LOCAL-NEXT: entry: // CHECK-LOCAL-NEXT: [[VALUES_I_I:%.*]] = alloca [4 x i16], align 2 -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA25]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA39]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 // CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL-NEXT: [[CMP_I25_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I25_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META37:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP3]], 2 // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPEKSLM4EPU3AS3SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INS4_9NAIVE_KEYEJEEENSC_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESU_NS0_4SPANISS_XT1_EEEST_SV__EXIT_I_I:%.*]] +// CHECK-LOCAL-NEXT: [[CMP_I29_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I29_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPEKSLM4EPU3AS3SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INS4_9NAIVE_KEYEJEEENSC_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESU_NS0_4SPANISS_XT1_EEEST_SV__EXIT_I_I:%.*]] // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA41]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META50:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[CALL_I_I_I_I_I_I]], 2 // CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: store i16 [[TMP3]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA41]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEKsLm4EPU3AS3sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INS4_9naive_keyEJEEENSC_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESU_NS0_4spanISS_XT1_EEEST_SV_.exit.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_ST__EXIT:%.*]] @@ -480,18 +480,18 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, span v, // CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 // CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] // CHECK-LOCAL: for.cond.cleanup.i.i: -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3mm(ptr addrspace(3) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i64, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA48]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3mm(ptr addrspace(3) noundef nonnull [[P]], i64 noundef [[TMP4]]) #[[ATTR7]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_ST__EXIT]] // CHECK-LOCAL: for.body.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I30_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I30_I_I]], align 2, !tbaa [[TBAA41]] // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP6]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: store i16 [[TMP5]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA41]] // CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS3sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INSA_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESS_NS0_4spanISQ_XT1_EEESR_ST_.exit: // CHECK-LOCAL-NEXT: ret void // @@ -504,11 +504,9 @@ SYCL_EXTERNAL void test_four_const_shorts(sycl::sub_group &sg, // CHECK-GLOBAL-LABEL: @_ZN7blocked21test_non_power_of_twoERN4sycl3_V19sub_groupENS1_4spanIiLm3EEEPU3AS1i( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META43:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 3 // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -517,24 +515,24 @@ SYCL_EXTERNAL void test_four_const_shorts(sycl::sub_group &sg, // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META55:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I_I_I_I_I]], 3 // CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7blocked21test_non_power_of_twoERN4sycl3_V19sub_groupENS1_4spanIiLm3EEEPU3AS3i( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META43:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 3 // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -543,13 +541,15 @@ SYCL_EXTERNAL void test_four_const_shorts(sycl::sub_group &sg, // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META55:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I_I_I_I_I]], 3 // CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void @@ -562,11 +562,9 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, span v, // CHECK-GLOBAL-LABEL: @_ZN7blocked14test_four_intsERN4sycl3_V19sub_groupENS1_4spanIiLm4EEEPU3AS1i( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META48:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP2]], 2 // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -575,24 +573,24 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, span v, // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META59:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[CALL_I_I_I_I_I_I]], 2 // CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP51:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm4EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7blocked14test_four_intsERN4sycl3_V19sub_groupENS1_4spanIiLm4EEEPU3AS3i( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META48:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP2]], 2 // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -601,13 +599,15 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, span v, // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META59:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[CALL_I_I_I_I_I_I]], 2 // CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP51:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm4EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void @@ -621,11 +621,9 @@ SYCL_EXTERNAL void test_four_ints(sycl::sub_group &sg, span v, // CHECK-GLOBAL-LABEL: @_ZN7blocked15test_seven_intsERN4sycl3_V19sub_groupENS1_4spanIiLm7EEEPU3AS1i( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META53:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 7 // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -634,24 +632,24 @@ SYCL_EXTERNAL void test_four_ints(sycl::sub_group &sg, span v, // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META63:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I_I_I_I_I]], 7 // CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP56:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm7EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7blocked15test_seven_intsERN4sycl3_V19sub_groupENS1_4spanIiLm7EEEPU3AS3i( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META53:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 7 // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -660,13 +658,15 @@ SYCL_EXTERNAL void test_four_ints(sycl::sub_group &sg, span v, // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META63:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I_I_I_I_I]], 7 // CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP56:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm7EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void @@ -681,11 +681,9 @@ SYCL_EXTERNAL void test_seven_ints(sycl::sub_group &sg, span v, namespace striped { // CHECK-GLOBAL-LABEL: @_ZN7striped10test_naiveERN4sycl3_V19sub_groupENS1_4spanIiLm2EEEPU3AS1i( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META58:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META61:![0-9]+]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] @@ -694,25 +692,25 @@ namespace striped { // CHECK-GLOBAL: for.body.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META67:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I2_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META70:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[CALL_I_I2_I_I_I]], [[I_0_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I]], [[MUL_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP64:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP73:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_SQ_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped10test_naiveERN4sycl3_V19sub_groupENS1_4spanIiLm2EEEPU3AS3i( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META58:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META61:![0-9]+]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] @@ -721,14 +719,16 @@ namespace striped { // CHECK-LOCAL: for.body.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META67:![0-9]+]] +// CHECK-LOCAL-NEXT: [[CALL_I_I2_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META70:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[CALL_I_I2_I_I_I]], [[I_0_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I]], [[MUL_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP64:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP73:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_SQ_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void @@ -741,116 +741,34 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, span v, // CHECK-GLOBAL-LABEL: @_ZN7striped14test_optimizedERN4sycl3_V19sub_groupENS1_4spanIiLm2EEEPU3AS1i( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[VALUES_I_I:%.*]] = alloca [2 x i32], align 4 -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] -// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 -// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] -// CHECK-GLOBAL: if.then.i.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META66:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META69:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 2 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I_I:%.*]] -// CHECK-GLOBAL: for.body.i.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP72:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-GLOBAL: if.end.i.i: -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] -// CHECK-GLOBAL: for.cond.cleanup.i.i: -// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I_I]], align 4, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jDv2_j(ptr addrspace(1) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP73:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: +// CHECK-GLOBAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 +// CHECK-GLOBAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 +// CHECK-GLOBAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) +// CHECK-GLOBAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA20]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1_I]], ptr addrspace(1) noundef [[P:%.*]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP2_I]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped14test_optimizedERN4sycl3_V19sub_groupENS1_4spanIiLm2EEEPU3AS3i( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[VALUES_I_I:%.*]] = alloca [2 x i32], align 4 -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] -// CHECK-LOCAL: if.then.i.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META66:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META69:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 2 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I_I:%.*]] -// CHECK-LOCAL: for.body.i.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP5]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP72:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2EPU3AS3iNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-LOCAL: if.end.i.i: -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] -// CHECK-LOCAL: for.cond.cleanup.i.i: -// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I_I]], align 4, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3jDv2_j(ptr addrspace(3) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP73:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: +// CHECK-LOCAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 +// CHECK-LOCAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 +// CHECK-LOCAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) +// CHECK-LOCAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA20]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2EPU3AS3iNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1_I]], ptr addrspace(3) noundef [[P:%.*]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP2_I]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) // CHECK-LOCAL-NEXT: ret void // SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, span v, @@ -861,116 +779,34 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, span v, // CHECK-GLOBAL-LABEL: @_ZN7striped27test_contiguous_auto_detectERN4sycl3_V19sub_groupENS1_4spanIiLm2EEEPU3AS1i( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[VALUES_I_I:%.*]] = alloca [2 x i32], align 4 -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] -// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 -// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] -// CHECK-GLOBAL: if.then.i.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META75:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META78:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 2 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT_I_I:%.*]] -// CHECK-GLOBAL: for.body.i.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP81:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit.i.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT:%.*]] -// CHECK-GLOBAL: if.end.i.i: -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] -// CHECK-GLOBAL: for.cond.cleanup.i.i: -// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I_I]], align 4, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jDv2_j(ptr addrspace(1) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP82:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_SQ_.exit: +// CHECK-GLOBAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 +// CHECK-GLOBAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 +// CHECK-GLOBAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.30", align 1 +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 3, ptr nonnull [[AGG_TMP2_I]]) +// CHECK-GLOBAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA20]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_SQ_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1_I]], ptr addrspace(1) noundef [[P:%.*]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.30") align 1 [[AGG_TMP2_I]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 3, ptr nonnull [[AGG_TMP2_I]]) // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped27test_contiguous_auto_detectERN4sycl3_V19sub_groupENS1_4spanIiLm2EEEPU3AS3i( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[VALUES_I_I:%.*]] = alloca [2 x i32], align 4 -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] -// CHECK-LOCAL: if.then.i.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META75:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META78:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 2 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT_I_I:%.*]] -// CHECK-LOCAL: for.body.i.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP5]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP81:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2EPU3AS3iNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit.i.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT:%.*]] -// CHECK-LOCAL: if.end.i.i: -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] -// CHECK-LOCAL: for.cond.cleanup.i.i: -// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I_I]], align 4, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3jDv2_j(ptr addrspace(3) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP82:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_SQ_.exit: +// CHECK-LOCAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 +// CHECK-LOCAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 +// CHECK-LOCAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.30", align 1 +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 3, ptr nonnull [[AGG_TMP2_I]]) +// CHECK-LOCAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA20]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2EPU3AS3iNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_SQ_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1_I]], ptr addrspace(3) noundef [[P:%.*]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.30") align 1 [[AGG_TMP2_I]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 3, ptr nonnull [[AGG_TMP2_I]]) // CHECK-LOCAL-NEXT: ret void // SYCL_EXTERNAL void test_contiguous_auto_detect(sycl::sub_group &sg, @@ -986,15 +822,13 @@ using accessor_iter_t = accessor v, // CHECK-GLOBAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 // CHECK-GLOBAL-NEXT: [[AGG_TMP3_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 // CHECK-GLOBAL-NEXT: [[AGG_TMP26:%.*]] = alloca %"class.sycl::_V1::detail::accessor_iterator", align 8 -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 80, ptr nonnull [[AGG_TMP26]]) // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP3_I]]) // CHECK-GLOBAL-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 8 dereferenceable(80) [[AGG_TMP26]], ptr addrspace(4) noundef align 8 dereferenceable(80) [[ITER:%.*]], i64 80, i1 false) -// CHECK-GLOBAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA20]] // CHECK-GLOBAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESS_NS0_4spanISQ_XT1_EEESR_ST_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP26]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP3_I]]) #[[ATTR7]] // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 80, ptr nonnull [[AGG_TMP26]]) // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) @@ -1078,13 +914,13 @@ SYCL_EXTERNAL void test_accessor_iter(sycl::sub_group &sg, span v, // CHECK-LOCAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 // CHECK-LOCAL-NEXT: [[AGG_TMP3_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 // CHECK-LOCAL-NEXT: [[AGG_TMP26:%.*]] = alloca %"class.sycl::_V1::detail::accessor_iterator", align 8 -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 80, ptr nonnull [[AGG_TMP26]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP3_I]]) // CHECK-LOCAL-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 8 dereferenceable(80) [[AGG_TMP26]], ptr addrspace(4) noundef align 8 dereferenceable(80) [[ITER:%.*]], i64 80, i1 false) -// CHECK-LOCAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESS_NS0_4spanISQ_XT1_EEESR_ST_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP26]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP3_I]]) #[[ATTR7]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 80, ptr nonnull [[AGG_TMP26]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) @@ -1101,116 +937,34 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-GLOBAL-LABEL: @_ZN7striped16test_four_shortsERN4sycl3_V19sub_groupENS1_4spanIsLm4EEEPU3AS1s( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[VALUES_I_I:%.*]] = alloca [4 x i16], align 2 -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA25]] -// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 -// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] -// CHECK-GLOBAL: if.then.i.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META109:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META112:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I_I:%.*]] -// CHECK-GLOBAL: for.body.i.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP115:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-GLOBAL: if.end.i.i: -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] -// CHECK-GLOBAL: for.cond.cleanup.i.i: -// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1tDv4_t(ptr addrspace(1) noundef nonnull [[P]], <4 x i16> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP116:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: +// CHECK-GLOBAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 +// CHECK-GLOBAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.17", align 8 +// CHECK-GLOBAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA39]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) +// CHECK-GLOBAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA39]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span.17") align 8 [[AGG_TMP1_I]], ptr addrspace(1) noundef [[P:%.*]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP2_I]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped16test_four_shortsERN4sycl3_V19sub_groupENS1_4spanIsLm4EEEPU3AS3s( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[VALUES_I_I:%.*]] = alloca [4 x i16], align 2 -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA25]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] -// CHECK-LOCAL: if.then.i.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META109:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META112:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I_I:%.*]] -// CHECK-LOCAL: for.body.i.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP5]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP115:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEsLm4EPU3AS3sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-LOCAL: if.end.i.i: -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] -// CHECK-LOCAL: for.cond.cleanup.i.i: -// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3tDv4_t(ptr addrspace(3) noundef nonnull [[P]], <4 x i16> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP116:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS3sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: +// CHECK-LOCAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 +// CHECK-LOCAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.17", align 8 +// CHECK-LOCAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA39]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) +// CHECK-LOCAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA39]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEsLm4EPU3AS3sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span.17") align 8 [[AGG_TMP1_I]], ptr addrspace(3) noundef [[P:%.*]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP2_I]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) // CHECK-LOCAL-NEXT: ret void // SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, span v, @@ -1222,116 +976,34 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, span v, // CHECK-GLOBAL-LABEL: @_ZN7striped19test_sixteen_shortsERN4sycl3_V19sub_groupENS1_4spanIsLm16EEEPU3AS1s( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[VALUES_I_I:%.*]] = alloca [16 x i16], align 2 -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA25]] -// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 -// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] -// CHECK-GLOBAL: if.then.i.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META118:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META121:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 16 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPESLM16EPU3AS1SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I_I:%.*]] -// CHECK-GLOBAL: for.body.i.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP124:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEsLm16EPU3AS1sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-GLOBAL: if.end.i.i: -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 16 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] -// CHECK-GLOBAL: for.cond.cleanup.i.i: -// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <16 x i16>, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1tDv16_t(ptr addrspace(1) noundef nonnull [[P]], <16 x i16> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [16 x i16], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP125:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm16EPU3AS1sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: +// CHECK-GLOBAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 +// CHECK-GLOBAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.32", align 8 +// CHECK-GLOBAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA39]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) +// CHECK-GLOBAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA39]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEsLm16EPU3AS1sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span.32") align 8 [[AGG_TMP1_I]], ptr addrspace(1) noundef [[P:%.*]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP2_I]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped19test_sixteen_shortsERN4sycl3_V19sub_groupENS1_4spanIsLm16EEEPU3AS3s( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[VALUES_I_I:%.*]] = alloca [16 x i16], align 2 -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA25]] -// CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] -// CHECK-LOCAL: if.then.i.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META118:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META121:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 16 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPESLM16EPU3AS3SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I_I:%.*]] -// CHECK-LOCAL: for.body.i.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP5]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP124:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEsLm16EPU3AS3sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-LOCAL: if.end.i.i: -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 16 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] -// CHECK-LOCAL: for.cond.cleanup.i.i: -// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <16 x i16>, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3tDv16_t(ptr addrspace(3) noundef nonnull [[P]], <16 x i16> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [16 x i16], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP125:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm16EPU3AS3sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: +// CHECK-LOCAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 +// CHECK-LOCAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.32", align 8 +// CHECK-LOCAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA39]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) +// CHECK-LOCAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA39]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEsLm16EPU3AS3sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span.32") align 8 [[AGG_TMP1_I]], ptr addrspace(3) noundef [[P:%.*]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP2_I]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) // CHECK-LOCAL-NEXT: ret void // SYCL_EXTERNAL void test_sixteen_shorts(sycl::sub_group &sg, span v, @@ -1341,11 +1013,9 @@ SYCL_EXTERNAL void test_sixteen_shorts(sycl::sub_group &sg, span v, // CHECK-GLOBAL-LABEL: @_ZN7striped21test_non_power_of_twoERN4sycl3_V19sub_groupENS1_4spanIiLm3EEEPU3AS1i( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META127:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META130:![0-9]+]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -1354,25 +1024,25 @@ SYCL_EXTERNAL void test_sixteen_shorts(sycl::sub_group &sg, span v, // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META142:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META145:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP133:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP148:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped21test_non_power_of_twoERN4sycl3_V19sub_groupENS1_4spanIiLm3EEEPU3AS3i( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META127:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META130:![0-9]+]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -1381,14 +1051,16 @@ SYCL_EXTERNAL void test_sixteen_shorts(sycl::sub_group &sg, span v, // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META142:![0-9]+]] +// CHECK-LOCAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META145:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP133:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP148:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void @@ -1401,11 +1073,9 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, span v, // CHECK-GLOBAL-LABEL: @_ZN7striped17test_sixteen_intsERN4sycl3_V19sub_groupENS1_4spanIiLm16EEEPU3AS1i( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META135:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META138:![0-9]+]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -1414,25 +1084,25 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, span v, // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META149:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META152:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP141:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP155:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm16EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped17test_sixteen_intsERN4sycl3_V19sub_groupENS1_4spanIiLm16EEEPU3AS3i( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META135:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META138:![0-9]+]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -1441,14 +1111,16 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, span v, // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META149:![0-9]+]] +// CHECK-LOCAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META152:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP141:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP155:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm16EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void @@ -1462,11 +1134,9 @@ SYCL_EXTERNAL void test_sixteen_ints(sycl::sub_group &sg, span v, // CHECK-GLOBAL-LABEL: @_ZN7striped16test_eleven_intsERN4sycl3_V19sub_groupENS1_4spanIiLm11EEEPU3AS1i( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META143:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META146:![0-9]+]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i.i: // CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -1475,25 +1145,25 @@ SYCL_EXTERNAL void test_sixteen_ints(sycl::sub_group &sg, span v, // CHECK-GLOBAL: for.body.i.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META156:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META159:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP149:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP162:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm11EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped16test_eleven_intsERN4sycl3_V19sub_groupENS1_4spanIiLm11EEEPU3AS3i( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA20]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META143:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META146:![0-9]+]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i.i: // CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -1502,14 +1172,16 @@ SYCL_EXTERNAL void test_sixteen_ints(sycl::sub_group &sg, span v, // CHECK-LOCAL: for.body.i.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA10]] +// CHECK-LOCAL-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() #[[ATTR7]], !noalias [[META156:![0-9]+]] +// CHECK-LOCAL-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = tail call spir_func noundef i32 @_Z27__spirv_BuiltInSubgroupSizev() #[[ATTR7]], !noalias [[META159:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[CALL_I_I2_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[CALL_I_I_I_I_I_I]], [[MUL_I_I_I_I]] // CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 // CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: store i32 [[TMP2]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA10]] // CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP149:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP162:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm11EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void diff --git a/sycl/test/extensions/bindless_images_SPIRV_inst.cpp b/sycl/test/extensions/bindless_images_SPIRV_inst.cpp index dd91c773ea94b..b4fee75aa1de6 100644 --- a/sycl/test/extensions/bindless_images_SPIRV_inst.cpp +++ b/sycl/test/extensions/bindless_images_SPIRV_inst.cpp @@ -26,8 +26,9 @@ // 0 read only and 1 write only. // Arguments: TypeImage, Result, Sampled Type, Dim, Depth, Arrayed, MS, Sampled // and Image Format. -// CHECK: TypeImage [[IMAGETYPEREAD:[0-9]+]] 7 0 0 0 0 0 0 0 -// CHECK: TypeImage [[IMAGETYPEWRITE:[0-9]+]] 7 0 0 0 0 0 0 1 +// CHECK: 2 TypeVoid [[#TyVoid:]] +// CHECK: TypeImage [[IMAGETYPEREAD:[0-9]+]] [[#TyVoid]] 0 0 0 0 0 0 0 +// CHECK: TypeImage [[IMAGETYPEWRITE:[0-9]+]] [[#TyVoid]] 0 0 0 0 0 0 1 // Generate `Result Type` for samplers // Arguments: Result // CHECK: TypeSampler [[SAMPLERTYPE:[0-9]+]] diff --git a/sycl/test/include_deps/sycl_accessor.hpp.cpp b/sycl/test/include_deps/sycl_accessor.hpp.cpp index fcd8fe3e5e904..36c74a2fb4969 100644 --- a/sycl/test/include_deps/sycl_accessor.hpp.cpp +++ b/sycl/test/include_deps/sycl_accessor.hpp.cpp @@ -17,9 +17,9 @@ // CHECK-NEXT: stl_wrappers/cassert // CHECK-NEXT: stl_wrappers/assert.h // CHECK-NEXT: __spirv/spirv_vars.hpp +// CHECK-NEXT: detail/helpers.hpp // CHECK-NEXT: __spirv/spirv_types.hpp // CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/helpers.hpp // CHECK-NEXT: memory_enums.hpp // CHECK-NEXT: detail/iostream_proxy.hpp // CHECK-NEXT: detail/is_device_copyable.hpp diff --git a/sycl/test/include_deps/sycl_buffer.hpp.cpp b/sycl/test/include_deps/sycl_buffer.hpp.cpp index 0a821e1fe79c0..3799080a01298 100644 --- a/sycl/test/include_deps/sycl_buffer.hpp.cpp +++ b/sycl/test/include_deps/sycl_buffer.hpp.cpp @@ -16,9 +16,9 @@ // CHECK-NEXT: stl_wrappers/cassert // CHECK-NEXT: stl_wrappers/assert.h // CHECK-NEXT: __spirv/spirv_vars.hpp +// CHECK-NEXT: detail/helpers.hpp // CHECK-NEXT: __spirv/spirv_types.hpp // CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/helpers.hpp // CHECK-NEXT: memory_enums.hpp // CHECK-NEXT: detail/iostream_proxy.hpp // CHECK-NEXT: detail/is_device_copyable.hpp diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index 5c91a32bbdb04..972f4f98080ba 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -18,9 +18,9 @@ // CHECK-NEXT: stl_wrappers/cassert // CHECK-NEXT: stl_wrappers/assert.h // CHECK-NEXT: __spirv/spirv_vars.hpp +// CHECK-NEXT: detail/helpers.hpp // CHECK-NEXT: __spirv/spirv_types.hpp // CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/helpers.hpp // CHECK-NEXT: memory_enums.hpp // CHECK-NEXT: detail/iostream_proxy.hpp // CHECK-NEXT: detail/is_device_copyable.hpp From e4cf7052a9854d9bb10b3ba126f77930f7936206 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Thu, 10 Jul 2025 01:18:24 -0700 Subject: [PATCH 2/7] fix test build errors --- .../include/libspirv/workitem/get_local_linear_id.h | 2 +- libclc/libspirv/lib/amdgcn-amdhsa/group/collectives.cl | 2 +- libclc/libspirv/lib/ptx-nvidiacl/group/collectives.cl | 10 +++++----- libdevice/sanitizer/asan_rtl.cpp | 2 +- libdevice/sanitizer/msan_rtl.cpp | 2 +- llvm/lib/SYCLLowerIR/LowerWGScope.cpp | 2 +- llvm/test/SYCLLowerIR/convergent.ll | 8 ++++---- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/libclc/libspirv/include/libspirv/workitem/get_local_linear_id.h b/libclc/libspirv/include/libspirv/workitem/get_local_linear_id.h index dff7923b9d31c..5605c19c5c641 100644 --- a/libclc/libspirv/include/libspirv/workitem/get_local_linear_id.h +++ b/libclc/libspirv/include/libspirv/workitem/get_local_linear_id.h @@ -6,4 +6,4 @@ // //===----------------------------------------------------------------------===// -_CLC_DECL _CLC_OVERLOAD size_t __spirv_LocalInvocationIndex(); +_CLC_DECL _CLC_OVERLOAD size_t __spirv_BuiltInLocalInvocationIndex(); diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/group/collectives.cl b/libclc/libspirv/lib/amdgcn-amdhsa/group/collectives.cl index 4a25ee769b599..5296170b9e445 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/group/collectives.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/group/collectives.cl @@ -348,7 +348,7 @@ long __clc__3d_to_linear_local_id(ulong3 id) { if (scope == Subgroup) { \ return __spirv_SubgroupShuffleINTEL(x, local_id); \ } \ - bool source = (__spirv_LocalInvocationIndex() == local_id); \ + bool source = (__spirv_BuiltInLocalInvocationIndex() == local_id); \ __local TYPE *scratch = __CLC_APPEND(__clc__get_group_scratch_, TYPE)(); \ if (source) { \ *scratch = x; \ diff --git a/libclc/libspirv/lib/ptx-nvidiacl/group/collectives.cl b/libclc/libspirv/lib/ptx-nvidiacl/group/collectives.cl index 64f0d8354f2cf..0c4b0983f884d 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/group/collectives.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/group/collectives.cl @@ -627,11 +627,11 @@ long __clc__3d_to_linear_local_id(ulong3 id) { #define __CLC_GROUP_BROADCAST(TYPE) \ _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \ - int scope, TYPE x, ulong local_id) { \ + int scope, TYPE x, ulong local_id) { \ if (scope == Subgroup) { \ return __clc__SubgroupShuffle(x, local_id); \ } \ - bool source = (__spirv_LocalInvocationIndex() == local_id); \ + bool source = (__spirv_BuiltInLocalInvocationIndex() == local_id); \ __local TYPE *scratch = __CLC_APPEND(__clc__get_group_scratch_, TYPE)(); \ if (source) { \ *scratch = x; \ @@ -642,17 +642,17 @@ long __clc__3d_to_linear_local_id(ulong3 id) { return result; \ } \ _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \ - int scope, TYPE x, ulong2 local_id) { \ + int scope, TYPE x, ulong2 local_id) { \ ulong linear_local_id = __clc__2d_to_linear_local_id(local_id); \ return __spirv_GroupBroadcast(scope, x, linear_local_id); \ } \ _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \ - int scope, TYPE x, ulong3 local_id) { \ + int scope, TYPE x, ulong3 local_id) { \ ulong linear_local_id = __clc__3d_to_linear_local_id(local_id); \ return __spirv_GroupBroadcast(scope, x, linear_local_id); \ } \ _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \ - int scope, TYPE x, uint local_id) { \ + int scope, TYPE x, uint local_id) { \ return __spirv_GroupBroadcast(scope, x, (ulong)local_id); \ } __CLC_GROUP_BROADCAST(char); diff --git a/libdevice/sanitizer/asan_rtl.cpp b/libdevice/sanitizer/asan_rtl.cpp index 8147a3cbe9628..920da98d4ea7b 100644 --- a/libdevice/sanitizer/asan_rtl.cpp +++ b/libdevice/sanitizer/asan_rtl.cpp @@ -918,7 +918,7 @@ __asan_set_private_base(__SYCL_PRIVATE__ void *ptr) { launch_info->PrivateBase == 0) return; // Only set on the first sub-group item - if (__spirv_BuiltInSubgroupLocalInvocationId == 0) { + if (__spirv_BuiltInSubgroupLocalInvocationId() == 0) { const size_t sid = SubGroupLinearId(); launch_info->PrivateBase[sid] = (uptr)ptr; ASAN_DEBUG(__spirv_ocl_printf(__asan_print_private_base, sid, ptr)); diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index 7663f46129451..ec6ba96b5d1e7 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -722,7 +722,7 @@ __msan_set_private_base(__SYCL_PRIVATE__ void *ptr) { GetMsanLaunchInfo->PrivateBase == 0) return; // Only set on the first sub-group item - if (__spirv_BuiltInSubgroupLocalInvocationId == 0) { + if (__spirv_BuiltInSubgroupLocalInvocationId() == 0) { const size_t sid = SubGroupLinearId(); GetMsanLaunchInfo->PrivateBase[sid] = (uptr)ptr; MSAN_DEBUG(__spirv_ocl_printf(__msan_print_private_base, sid, ptr)); diff --git a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp index b6be652a7b20a..7c4f4f93eb498 100644 --- a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp +++ b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp @@ -947,7 +947,7 @@ Value *spirv::genPseudoLocalID(Instruction &Before, const Triple &TT) { return Bld.CreateCall(Callee, {ConstantInt::get(ArgTy, Dim)}); }; - StringRef LocalInvocationIdName = "_Z33__spirv_BuiltInLocalInvocationIdi"; + StringRef LocalInvocationIdName = "_Z32__spirv_BuiltInLocalInvocationIdi"; Value *LocalInvocationIdX = CreateCallee(LocalInvocationIdName, 0); Value *LocalInvocationIdY = CreateCallee(LocalInvocationIdName, 1); Value *LocalInvocationIdZ = CreateCallee(LocalInvocationIdName, 2); diff --git a/llvm/test/SYCLLowerIR/convergent.ll b/llvm/test/SYCLLowerIR/convergent.ll index 81a4cf248cf87..d424f3695f110 100644 --- a/llvm/test/SYCLLowerIR/convergent.ll +++ b/llvm/test/SYCLLowerIR/convergent.ll @@ -9,14 +9,14 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" %struct.baz = type { i64 } define internal spir_func void @wibble(ptr byval(%struct.baz) %arg1) !work_group_scope !0 { -; CHECK-PTX: call i64 @_Z33__spirv_BuiltInLocalInvocationIdi(i32 0) -; CHECK-PTX: call i64 @_Z33__spirv_BuiltInLocalInvocationIdi(i32 1) -; CHECK-PTX: call i64 @_Z33__spirv_BuiltInLocalInvocationIdi(i32 2) +; CHECK-PTX: call i64 @_Z32__spirv_BuiltInLocalInvocationIdi(i32 0) +; CHECK-PTX: call i64 @_Z32__spirv_BuiltInLocalInvocationIdi(i32 1) +; CHECK-PTX: call i64 @_Z32__spirv_BuiltInLocalInvocationIdi(i32 2) ; CHECK: call void @_Z22__spirv_ControlBarrieriii(i32 2, i32 2, i32 272) ret void } -; CHECK-PTX: declare i64 @_Z33__spirv_BuiltInLocalInvocationIdi(i32) +; CHECK-PTX: declare i64 @_Z32__spirv_BuiltInLocalInvocationIdi(i32) ; CHECK: ; Function Attrs: convergent ; CHECK: declare void @_Z22__spirv_ControlBarrieriii(i32, i32, i32) #[[ATTR_NUM:[0-9]+]] From ef7c1a6f3717cfa7d5268eb9cd65699c44f132a9 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Thu, 10 Jul 2025 03:32:36 -0700 Subject: [PATCH 3/7] fix native_cpu tests --- .../lib/native_cpu/workitem/get_global_id.cl | 21 ++++++++++--------- libdevice/nativecpu_utils.cpp | 9 +++----- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/libclc/libspirv/lib/native_cpu/workitem/get_global_id.cl b/libclc/libspirv/lib/native_cpu/workitem/get_global_id.cl index 7236b41bba0d7..7e6d75dbc9998 100644 --- a/libclc/libspirv/lib/native_cpu/workitem/get_global_id.cl +++ b/libclc/libspirv/lib/native_cpu/workitem/get_global_id.cl @@ -10,14 +10,15 @@ ulong __mux_get_global_id(int); -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_x() { - return __mux_get_global_id(0); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_y() { - return __mux_get_global_id(1); -} - -_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_z() { - return __mux_get_global_id(2); +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalInvocationId(int dim) { + switch (dim) { + case 0: + return __mux_get_global_id(0); + case 1: + return __mux_get_global_id(1); + case 2: + return __mux_get_global_id(2); + default: + return 0; + } } diff --git a/libdevice/nativecpu_utils.cpp b/libdevice/nativecpu_utils.cpp index 00d4178d2921c..312fee6424f1c 100644 --- a/libdevice/nativecpu_utils.cpp +++ b/libdevice/nativecpu_utils.cpp @@ -334,15 +334,12 @@ GEN_u32(__spirv_BuiltInSubgroupMaxSize, __mux_get_max_sub_group_size); GEN_u32(__spirv_BuiltInSubgroupId, __mux_get_sub_group_id); // I64_I32 -#define GEN_p(bname, muxname, arg) \ +#define GEN_p(bname, muxname) \ DEVICE_EXTERN_C GET_PROPS uint64_t muxname(uint32_t); \ - DEVICE_EXTERNAL GET_PROPS uint64_t bname() { return muxname(arg); } \ + DEVICE_EXTERNAL GET_PROPS uint64_t bname(int dim) { return muxname(dim); } \ static_assert(true) -#define GEN_xyz(bname, ncpu_name) \ - GEN_p(bname##_x, ncpu_name, 0); \ - GEN_p(bname##_y, ncpu_name, 1); \ - GEN_p(bname##_z, ncpu_name, 2) +#define GEN_xyz(bname, ncpu_name) GEN_p(bname, ncpu_name); GEN_xyz(__spirv_BuiltInGlobalOffset, __mux_get_global_offset); GEN_xyz(__spirv_BuiltInLocalInvocationId, __mux_get_local_id); From 3a4d563aae888c6bd929104ef997e018aa5cd7f2 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Fri, 18 Jul 2025 11:17:49 +0200 Subject: [PATCH 4/7] add a comment --- libdevice/spirv_vars.h | 2 ++ sycl/include/sycl/__spirv/spirv_vars.hpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/libdevice/spirv_vars.h b/libdevice/spirv_vars.h index fad12e685c278..e0daefc0ef59b 100644 --- a/libdevice/spirv_vars.h +++ b/libdevice/spirv_vars.h @@ -15,6 +15,8 @@ #ifdef __SYCL_DEVICE_ONLY__ +// SPIR-V built-in variables mapped to function call. + DEVICE_EXTERNAL size_t __spirv_BuiltInGlobalInvocationId(int); DEVICE_EXTERNAL size_t __spirv_BuiltInGlobalSize(int); DEVICE_EXTERNAL size_t __spirv_BuiltInGlobalOffset(int); diff --git a/sycl/include/sycl/__spirv/spirv_vars.hpp b/sycl/include/sycl/__spirv/spirv_vars.hpp index aa2c5d27a275d..1609539dd4c5e 100644 --- a/sycl/include/sycl/__spirv/spirv_vars.hpp +++ b/sycl/include/sycl/__spirv/spirv_vars.hpp @@ -15,6 +15,8 @@ #include // for size_t #include // for uint32_t +// SPIR-V built-in variables mapped to function call. + __DPCPP_SYCL_EXTERNAL size_t __spirv_BuiltInGlobalInvocationId(int); __DPCPP_SYCL_EXTERNAL size_t __spirv_BuiltInGlobalSize(int); __DPCPP_SYCL_EXTERNAL size_t __spirv_BuiltInGlobalOffset(int); From a66759ad05cca44da3f220a29cb66e2aba0c49b3 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Fri, 18 Jul 2025 13:56:37 +0200 Subject: [PATCH 5/7] simplify dim in libspirv functions --- .../amdgcn-amdhsa/workitem/get_global_size.cl | 13 ++---- .../amdgcn-amdhsa/workitem/get_local_size.cl | 13 ++---- .../amdgcn-amdhsa/workitem/get_num_groups.cl | 37 +++-------------- .../lib/generic/workitem/get_global_id.cl | 16 ++------ .../lib/generic/workitem/get_global_size.cl | 11 +---- .../lib/native_cpu/workitem/get_global_id.cl | 11 +---- .../native_cpu/workitem/get_global_size.cl | 11 +---- .../ptx-nvidiacl/workitem/get_global_id.cl | 41 ++++--------------- .../ptx-nvidiacl/workitem/get_global_size.cl | 11 +---- .../lib/r600/workitem/get_global_offset.cl | 21 +++------- 10 files changed, 35 insertions(+), 150 deletions(-) diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_global_size.cl b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_global_size.cl index 0096cb9873969..e7916b5772c19 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_global_size.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_global_size.cl @@ -25,19 +25,14 @@ __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr"); #endif _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalSize(int dim) { + CONST_AS uint *ptr = (CONST_AS uint *)__dispatch_ptr(); switch (dim) { - case 0: { - CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr(); + case 0: return ptr[3]; - } - case 1: { - CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr(); + case 1: return ptr[4]; - } - case 2: { - CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr(); + case 2: return ptr[5]; - } default: return 1; } diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_local_size.cl b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_local_size.cl index 29ecd8d099cbe..daed031cfb110 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_local_size.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_local_size.cl @@ -27,19 +27,14 @@ __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr"); // Mimic `EmitAMDGPUWorkGroupSize` in `clang/lib/CodeGen/CGBuiltin.cpp`. _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInWorkgroupSize(int dim) { + CONST_AS ushort *ptr = (CONST_AS ushort *)__dispatch_ptr(); switch (dim) { - case 0: { - CONST_AS ushort * ptr = (CONST_AS ushort *) __dispatch_ptr(); + case 0: return ptr[2]; - } - case 1: { - CONST_AS ushort * ptr = (CONST_AS ushort *) __dispatch_ptr(); + case 1: return ptr[3]; - } - case 2: { - CONST_AS ushort * ptr = (CONST_AS ushort *) __dispatch_ptr(); + case 2: return ptr[4]; - } default: return 1; } diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_num_groups.cl b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_num_groups.cl index 10aa90701f2b0..f62856ceb99b7 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_num_groups.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_num_groups.cl @@ -9,35 +9,10 @@ #include _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInNumWorkgroups(int dim) { - switch (dim) { - case 0: { - size_t global_size = __spirv_BuiltInGlobalSize(0); - size_t local_size = __spirv_BuiltInWorkgroupSize(0); - size_t num_groups = global_size / local_size; - if (global_size % local_size != 0) { - num_groups++; - } - return num_groups; - } - case 1: { - size_t global_size = __spirv_BuiltInGlobalSize(1); - size_t local_size = __spirv_BuiltInWorkgroupSize(1); - size_t num_groups = global_size / local_size; - if (global_size % local_size != 0) { - num_groups++; - } - return num_groups; - } - case 2: { - size_t global_size = __spirv_BuiltInGlobalSize(2); - size_t local_size = __spirv_BuiltInWorkgroupSize(2); - size_t num_groups = global_size / local_size; - if (global_size % local_size != 0) { - num_groups++; - } - return num_groups; - } - default: - return 0; - } + size_t global_size = __spirv_BuiltInGlobalSize(dim); + size_t local_size = __spirv_BuiltInWorkgroupSize(dim); + size_t num_groups = global_size / local_size; + if (global_size % local_size != 0) + num_groups++; + return num_groups; } diff --git a/libclc/libspirv/lib/generic/workitem/get_global_id.cl b/libclc/libspirv/lib/generic/workitem/get_global_id.cl index 11f465e76a563..ff584a0d2153c 100644 --- a/libclc/libspirv/lib/generic/workitem/get_global_id.cl +++ b/libclc/libspirv/lib/generic/workitem/get_global_id.cl @@ -9,17 +9,7 @@ #include _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalInvocationId(int dim) { - switch (dim) { - case 0: - return __spirv_BuiltInWorkgroupId(0) * __spirv_BuiltInWorkgroupSize(0) + - __spirv_BuiltInLocalInvocationId(0) + __spirv_BuiltInGlobalOffset(0); - case 1: - return __spirv_BuiltInWorkgroupId(1) * __spirv_BuiltInWorkgroupSize(1) + - __spirv_BuiltInLocalInvocationId(1) + __spirv_BuiltInGlobalOffset(1); - case 2: - return __spirv_BuiltInWorkgroupId(2) * __spirv_BuiltInWorkgroupSize(2) + - __spirv_BuiltInLocalInvocationId(2) + __spirv_BuiltInGlobalOffset(2); - default: - return 0; - } + return __spirv_BuiltInWorkgroupId(dim) * __spirv_BuiltInWorkgroupSize(dim) + + __spirv_BuiltInLocalInvocationId(dim) + + __spirv_BuiltInGlobalOffset(dim); } diff --git a/libclc/libspirv/lib/generic/workitem/get_global_size.cl b/libclc/libspirv/lib/generic/workitem/get_global_size.cl index 708f0592487c0..d60719e81b143 100644 --- a/libclc/libspirv/lib/generic/workitem/get_global_size.cl +++ b/libclc/libspirv/lib/generic/workitem/get_global_size.cl @@ -9,14 +9,5 @@ #include _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalSize(int dim) { - switch (dim) { - case 0: - return __spirv_BuiltInNumWorkgroups(0) * __spirv_BuiltInWorkgroupSize(0); - case 1: - return __spirv_BuiltInNumWorkgroups(1) * __spirv_BuiltInWorkgroupSize(1); - case 2: - return __spirv_BuiltInNumWorkgroups(2) * __spirv_BuiltInWorkgroupSize(2); - default: - return 1; - } + return __spirv_BuiltInNumWorkgroups(dim) * __spirv_BuiltInWorkgroupSize(dim); } diff --git a/libclc/libspirv/lib/native_cpu/workitem/get_global_id.cl b/libclc/libspirv/lib/native_cpu/workitem/get_global_id.cl index 7e6d75dbc9998..6183c429c3b34 100644 --- a/libclc/libspirv/lib/native_cpu/workitem/get_global_id.cl +++ b/libclc/libspirv/lib/native_cpu/workitem/get_global_id.cl @@ -11,14 +11,5 @@ ulong __mux_get_global_id(int); _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalInvocationId(int dim) { - switch (dim) { - case 0: - return __mux_get_global_id(0); - case 1: - return __mux_get_global_id(1); - case 2: - return __mux_get_global_id(2); - default: - return 0; - } + return __mux_get_global_id(dim); } diff --git a/libclc/libspirv/lib/native_cpu/workitem/get_global_size.cl b/libclc/libspirv/lib/native_cpu/workitem/get_global_size.cl index fa4afed4f44c3..0e932e21eabf7 100644 --- a/libclc/libspirv/lib/native_cpu/workitem/get_global_size.cl +++ b/libclc/libspirv/lib/native_cpu/workitem/get_global_size.cl @@ -11,14 +11,5 @@ ulong __mux_get_global_size(int); _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalSize(int dim) { - switch (dim) { - case 0: - return __mux_get_global_size(0); - case 1: - return __mux_get_global_size(1); - case 2: - return __mux_get_global_size(2); - default: - return 1; - } + return __mux_get_global_size(dim); } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl index 3747f85030043..d617971832405 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl @@ -11,38 +11,13 @@ extern int __nvvm_reflect_ocl(constant char *); _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalInvocationId(int dim) { - switch (dim) { - case 0: { - if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { - return (uint)__spirv_BuiltInWorkgroupId(0) * - (uint)__spirv_BuiltInWorkgroupSize(0) + - (uint)__spirv_BuiltInLocalInvocationId(0) + - (uint)__spirv_BuiltInGlobalOffset(0); - } - return __spirv_BuiltInWorkgroupId(0) * __spirv_BuiltInWorkgroupSize(0) + - __spirv_BuiltInLocalInvocationId(0) + __spirv_BuiltInGlobalOffset(0); - } - case 1: { - if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { - return (uint)__spirv_BuiltInWorkgroupId(1) * - (uint)__spirv_BuiltInWorkgroupSize(1) + - (uint)__spirv_BuiltInLocalInvocationId(1) + - (uint)__spirv_BuiltInGlobalOffset(1); - } - return __spirv_BuiltInWorkgroupId(1) * __spirv_BuiltInWorkgroupSize(1) + - __spirv_BuiltInLocalInvocationId(1) + __spirv_BuiltInGlobalOffset(1); - } - case 2: { - if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { - return (uint)__spirv_BuiltInWorkgroupId(2) * - (uint)__spirv_BuiltInWorkgroupSize(2) + - (uint)__spirv_BuiltInLocalInvocationId(2) + - (uint)__spirv_BuiltInGlobalOffset(2); - } - return __spirv_BuiltInWorkgroupId(2) * __spirv_BuiltInWorkgroupSize(2) + - __spirv_BuiltInLocalInvocationId(2) + __spirv_BuiltInGlobalOffset(2); - } - default: - return 0; + if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { + return (uint)__spirv_BuiltInWorkgroupId(dim) * + (uint)__spirv_BuiltInWorkgroupSize(dim) + + (uint)__spirv_BuiltInLocalInvocationId(dim) + + (uint)__spirv_BuiltInGlobalOffset(dim); } + return __spirv_BuiltInWorkgroupId(dim) * __spirv_BuiltInWorkgroupSize(dim) + + __spirv_BuiltInLocalInvocationId(dim) + + __spirv_BuiltInGlobalOffset(dim); } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_size.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_size.cl index 708f0592487c0..d60719e81b143 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_size.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_size.cl @@ -9,14 +9,5 @@ #include _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalSize(int dim) { - switch (dim) { - case 0: - return __spirv_BuiltInNumWorkgroups(0) * __spirv_BuiltInWorkgroupSize(0); - case 1: - return __spirv_BuiltInNumWorkgroups(1) * __spirv_BuiltInWorkgroupSize(1); - case 2: - return __spirv_BuiltInNumWorkgroups(2) * __spirv_BuiltInWorkgroupSize(2); - default: - return 1; - } + return __spirv_BuiltInNumWorkgroups(dim) * __spirv_BuiltInWorkgroupSize(dim); } diff --git a/libclc/libspirv/lib/r600/workitem/get_global_offset.cl b/libclc/libspirv/lib/r600/workitem/get_global_offset.cl index 12c6f7fdb48a8..6b7c0eeba5358 100644 --- a/libclc/libspirv/lib/r600/workitem/get_global_offset.cl +++ b/libclc/libspirv/lib/r600/workitem/get_global_offset.cl @@ -9,25 +9,16 @@ #include _CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInGlobalOffset(int dim) { + __attribute__((address_space(7))) uint *ptr = + (__attribute__((address_space(7))) + uint *)__builtin_r600_implicitarg_ptr(); switch (dim) { - case 0: { - __attribute__((address_space(7))) uint *ptr = - (__attribute__((address_space(7))) - uint *)__builtin_r600_implicitarg_ptr(); + case 0: return ptr[1]; - } - case 1: { - __attribute__((address_space(7))) uint *ptr = - (__attribute__((address_space(7))) - uint *)__builtin_r600_implicitarg_ptr(); + case 1: return ptr[2]; - } - case 2: { - __attribute__((address_space(7))) uint *ptr = - (__attribute__((address_space(7))) - uint *)__builtin_r600_implicitarg_ptr(); + case 2: return ptr[3]; - } default: return 0; } From d28f0296cfdbc6d0955af51387005bbe49695203 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Fri, 18 Jul 2025 14:14:28 +0200 Subject: [PATCH 6/7] fix default value of BuiltInNumWorkgroups BuiltInWorkgroupSize --- libclc/libspirv/lib/amdgcn/workitem/get_local_size.cl | 2 +- libclc/libspirv/lib/amdgcn/workitem/get_num_groups.cl | 2 +- libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_groups.cl | 2 +- libclc/libspirv/lib/r600/workitem/get_num_groups.cl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libclc/libspirv/lib/amdgcn/workitem/get_local_size.cl b/libclc/libspirv/lib/amdgcn/workitem/get_local_size.cl index ce71723dd3317..7d06f00a2064c 100644 --- a/libclc/libspirv/lib/amdgcn/workitem/get_local_size.cl +++ b/libclc/libspirv/lib/amdgcn/workitem/get_local_size.cl @@ -21,6 +21,6 @@ _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInWorkgroupSize(int dim) { case 2: return __clc_amdgcn_get_local_size_z(); default: - return 0; + return 1; } } diff --git a/libclc/libspirv/lib/amdgcn/workitem/get_num_groups.cl b/libclc/libspirv/lib/amdgcn/workitem/get_num_groups.cl index beb54a8bad199..15555b825bdf1 100644 --- a/libclc/libspirv/lib/amdgcn/workitem/get_num_groups.cl +++ b/libclc/libspirv/lib/amdgcn/workitem/get_num_groups.cl @@ -21,6 +21,6 @@ _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInNumWorkgroups(int dim) { case 2: return __clc_amdgcn_get_num_groups_z(); default: - return 0; + return 1; } } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_groups.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_groups.cl index b5161378c7241..273329b9f82e5 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_groups.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_groups.cl @@ -17,6 +17,6 @@ _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInNumWorkgroups(int dim) { case 2: return __nvvm_read_ptx_sreg_nctaid_z(); default: - return 0; + return 1; } } diff --git a/libclc/libspirv/lib/r600/workitem/get_num_groups.cl b/libclc/libspirv/lib/r600/workitem/get_num_groups.cl index fe4a80af23b98..72bc76fa9865d 100644 --- a/libclc/libspirv/lib/r600/workitem/get_num_groups.cl +++ b/libclc/libspirv/lib/r600/workitem/get_num_groups.cl @@ -21,6 +21,6 @@ _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInNumWorkgroups(int dim) { case 2: return __clc_r600_get_num_groups_z(); default: - return 0; + return 1; } } From 43f08908ebf92cb0933b9450b8a3c7b8b4a681d7 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Mon, 21 Jul 2025 02:15:41 +0200 Subject: [PATCH 7/7] remove GEN_p --- libdevice/nativecpu_utils.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libdevice/nativecpu_utils.cpp b/libdevice/nativecpu_utils.cpp index 312fee6424f1c..747402a862a91 100644 --- a/libdevice/nativecpu_utils.cpp +++ b/libdevice/nativecpu_utils.cpp @@ -334,13 +334,11 @@ GEN_u32(__spirv_BuiltInSubgroupMaxSize, __mux_get_max_sub_group_size); GEN_u32(__spirv_BuiltInSubgroupId, __mux_get_sub_group_id); // I64_I32 -#define GEN_p(bname, muxname) \ +#define GEN_xyz(bname, muxname) \ DEVICE_EXTERN_C GET_PROPS uint64_t muxname(uint32_t); \ DEVICE_EXTERNAL GET_PROPS uint64_t bname(int dim) { return muxname(dim); } \ static_assert(true) -#define GEN_xyz(bname, ncpu_name) GEN_p(bname, ncpu_name); - GEN_xyz(__spirv_BuiltInGlobalOffset, __mux_get_global_offset); GEN_xyz(__spirv_BuiltInLocalInvocationId, __mux_get_local_id); GEN_xyz(__spirv_BuiltInNumWorkgroups, __mux_get_num_groups);