@@ -12,6 +12,7 @@ define amdgpu_kernel void @test1_s_barrier_signal(ptr addrspace(1) %out) #0 {
1212; GCN-NEXT: v_sub_nc_u32_e32 v0, v1, v0
1313; GCN-NEXT: s_wait_kmcnt 0x0
1414; GCN-NEXT: global_store_b32 v3, v2, s[0:1]
15+ ; GCN-NEXT: s_wait_storecnt 0x0
1516; GCN-NEXT: s_barrier_signal -1
1617; GCN-NEXT: s_barrier_wait -1
1718; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -28,6 +29,7 @@ define amdgpu_kernel void @test1_s_barrier_signal(ptr addrspace(1) %out) #0 {
2829; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
2930; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
3031; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
32+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
3133; GLOBAL-ISEL-NEXT: s_barrier_signal -1
3234; GLOBAL-ISEL-NEXT: s_barrier_wait -1
3335; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -56,6 +58,7 @@ define amdgpu_kernel void @test2_s_barrier_signal(ptr addrspace(1) %out) #0 {
5658; GCN-NEXT: v_sub_nc_u32_e32 v0, v1, v0
5759; GCN-NEXT: s_wait_kmcnt 0x0
5860; GCN-NEXT: global_store_b32 v3, v2, s[0:1]
61+ ; GCN-NEXT: s_wait_storecnt 0x0
5962; GCN-NEXT: s_barrier_signal 1
6063; GCN-NEXT: s_barrier_wait 1
6164; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -72,6 +75,7 @@ define amdgpu_kernel void @test2_s_barrier_signal(ptr addrspace(1) %out) #0 {
7275; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
7376; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
7477; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
78+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
7579; GLOBAL-ISEL-NEXT: s_barrier_signal 1
7680; GLOBAL-ISEL-NEXT: s_barrier_wait 1
7781; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -100,6 +104,7 @@ define amdgpu_kernel void @test3_s_barrier_signal(ptr addrspace(1) %out) #0 {
100104; GCN-NEXT: v_sub_nc_u32_e32 v0, v1, v0
101105; GCN-NEXT: s_wait_kmcnt 0x0
102106; GCN-NEXT: global_store_b32 v3, v2, s[0:1]
107+ ; GCN-NEXT: s_wait_storecnt 0x0
103108; GCN-NEXT: s_barrier_signal 0
104109; GCN-NEXT: s_barrier_wait 0
105110; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -116,6 +121,7 @@ define amdgpu_kernel void @test3_s_barrier_signal(ptr addrspace(1) %out) #0 {
116121; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
117122; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
118123; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
124+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
119125; GLOBAL-ISEL-NEXT: s_barrier_signal 0
120126; GLOBAL-ISEL-NEXT: s_barrier_wait 0
121127; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -146,6 +152,7 @@ define amdgpu_kernel void @test1_s_barrier_signal_var(ptr addrspace(1) %out) #0
146152; GCN-NEXT: v_sub_nc_u32_e32 v0, v2, v0
147153; GCN-NEXT: s_wait_kmcnt 0x0
148154; GCN-NEXT: global_store_b32 v3, v1, s[0:1]
155+ ; GCN-NEXT: s_wait_storecnt 0x0
149156; GCN-NEXT: s_barrier_signal m0
150157; GCN-NEXT: s_barrier_wait 1
151158; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -163,6 +170,7 @@ define amdgpu_kernel void @test1_s_barrier_signal_var(ptr addrspace(1) %out) #0
163170; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
164171; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
165172; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
173+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
166174; GLOBAL-ISEL-NEXT: s_barrier_signal m0
167175; GLOBAL-ISEL-NEXT: s_barrier_wait 1
168176; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -192,6 +200,7 @@ define void @test2_s_barrier_signal_var(i32 %arg) {
192200; GCN-NEXT: v_readfirstlane_b32 s0, v0
193201; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
194202; GCN-NEXT: s_mov_b32 m0, s0
203+ ; GCN-NEXT: s_wait_storecnt 0x0
195204; GCN-NEXT: s_barrier_signal m0
196205; GCN-NEXT: s_setpc_b64 s[30:31]
197206;
@@ -203,6 +212,7 @@ define void @test2_s_barrier_signal_var(i32 %arg) {
203212; GLOBAL-ISEL-NEXT: s_wait_bvhcnt 0x0
204213; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
205214; GLOBAL-ISEL-NEXT: v_readfirstlane_b32 m0, v0
215+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
206216; GLOBAL-ISEL-NEXT: s_barrier_signal m0
207217; GLOBAL-ISEL-NEXT: s_setpc_b64 s[30:31]
208218 call void @llvm.amdgcn.s.barrier.signal.var (i32 %arg )
@@ -216,6 +226,7 @@ define amdgpu_kernel void @test1_s_barrier_signal_isfirst(ptr addrspace(1) %a, p
216226; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
217227; GCN-NEXT: s_wait_kmcnt 0x0
218228; GCN-NEXT: global_store_b32 v0, v1, s[6:7]
229+ ; GCN-NEXT: s_wait_storecnt 0x0
219230; GCN-NEXT: s_barrier_signal_isfirst -1
220231; GCN-NEXT: s_cselect_b32 s3, s3, s5
221232; GCN-NEXT: s_cselect_b32 s2, s2, s4
@@ -235,6 +246,7 @@ define amdgpu_kernel void @test1_s_barrier_signal_isfirst(ptr addrspace(1) %a, p
235246; GLOBAL-ISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
236247; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
237248; GLOBAL-ISEL-NEXT: global_store_b32 v0, v1, s[6:7]
249+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
238250; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst -1
239251; GLOBAL-ISEL-NEXT: s_cselect_b32 s8, 1, 0
240252; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -270,6 +282,7 @@ define amdgpu_kernel void @test2_s_barrier_signal_isfirst(ptr addrspace(1) %a, p
270282; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
271283; GCN-NEXT: s_wait_kmcnt 0x0
272284; GCN-NEXT: global_store_b32 v0, v1, s[6:7]
285+ ; GCN-NEXT: s_wait_storecnt 0x0
273286; GCN-NEXT: s_barrier_signal_isfirst 1
274287; GCN-NEXT: s_cselect_b32 s3, s3, s5
275288; GCN-NEXT: s_cselect_b32 s2, s2, s4
@@ -289,6 +302,7 @@ define amdgpu_kernel void @test2_s_barrier_signal_isfirst(ptr addrspace(1) %a, p
289302; GLOBAL-ISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
290303; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
291304; GLOBAL-ISEL-NEXT: global_store_b32 v0, v1, s[6:7]
305+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
292306; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst 1
293307; GLOBAL-ISEL-NEXT: s_cselect_b32 s8, 1, 0
294308; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -324,6 +338,7 @@ define amdgpu_kernel void @test3_s_barrier_signal_isfirst(ptr addrspace(1) %a, p
324338; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
325339; GCN-NEXT: s_wait_kmcnt 0x0
326340; GCN-NEXT: global_store_b32 v0, v1, s[6:7]
341+ ; GCN-NEXT: s_wait_storecnt 0x0
327342; GCN-NEXT: s_barrier_signal_isfirst 1
328343; GCN-NEXT: s_cselect_b32 s3, s3, s5
329344; GCN-NEXT: s_cselect_b32 s2, s2, s4
@@ -343,6 +358,7 @@ define amdgpu_kernel void @test3_s_barrier_signal_isfirst(ptr addrspace(1) %a, p
343358; GLOBAL-ISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
344359; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
345360; GLOBAL-ISEL-NEXT: global_store_b32 v0, v1, s[6:7]
361+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
346362; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst 1
347363; GLOBAL-ISEL-NEXT: s_cselect_b32 s8, 1, 0
348364; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -379,6 +395,7 @@ define amdgpu_kernel void @test1_s_barrier_signal_isfirst_var(ptr addrspace(1) %
379395; GCN-NEXT: s_mov_b32 m0, 1
380396; GCN-NEXT: s_wait_kmcnt 0x0
381397; GCN-NEXT: global_store_b32 v0, v1, s[6:7]
398+ ; GCN-NEXT: s_wait_storecnt 0x0
382399; GCN-NEXT: s_barrier_signal_isfirst m0
383400; GCN-NEXT: s_cselect_b32 s3, s3, s5
384401; GCN-NEXT: s_cselect_b32 s2, s2, s4
@@ -399,6 +416,7 @@ define amdgpu_kernel void @test1_s_barrier_signal_isfirst_var(ptr addrspace(1) %
399416; GLOBAL-ISEL-NEXT: s_mov_b32 m0, 1
400417; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
401418; GLOBAL-ISEL-NEXT: global_store_b32 v0, v1, s[6:7]
419+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
402420; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst m0
403421; GLOBAL-ISEL-NEXT: s_cselect_b32 s8, 1, 0
404422; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -444,6 +462,7 @@ define void @test2_s_barrier_signal_isfirst_var(ptr addrspace(1) %a, ptr addrspa
444462; GCN-NEXT: v_add_co_u32 v7, vcc_lo, v7, v9
445463; GCN-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v8, vcc_lo
446464; GCN-NEXT: global_store_b32 v[7:8], v10, off
465+ ; GCN-NEXT: s_wait_storecnt 0x0
447466; GCN-NEXT: s_barrier_signal_isfirst m0
448467; GCN-NEXT: s_cselect_b32 vcc_lo, -1, 0
449468; GCN-NEXT: v_dual_cndmask_b32 v2, v4, v2 :: v_dual_cndmask_b32 v3, v5, v3
@@ -470,6 +489,7 @@ define void @test2_s_barrier_signal_isfirst_var(ptr addrspace(1) %a, ptr addrspa
470489; GLOBAL-ISEL-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v8, vcc_lo
471490; GLOBAL-ISEL-NEXT: v_mov_b32_e32 v9, 0
472491; GLOBAL-ISEL-NEXT: global_store_b32 v[7:8], v9, off
492+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
473493; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst m0
474494; GLOBAL-ISEL-NEXT: s_cselect_b32 s0, 1, 0
475495; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -1339,6 +1359,7 @@ define amdgpu_kernel void @test_barrier_convert(ptr addrspace(1) %out) #0 {
13391359; GCN-NEXT: v_sub_nc_u32_e32 v0, v1, v0
13401360; GCN-NEXT: s_wait_kmcnt 0x0
13411361; GCN-NEXT: global_store_b32 v3, v2, s[0:1]
1362+ ; GCN-NEXT: s_wait_storecnt 0x0
13421363; GCN-NEXT: s_barrier_signal -1
13431364; GCN-NEXT: s_barrier_wait -1
13441365; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -1355,6 +1376,7 @@ define amdgpu_kernel void @test_barrier_convert(ptr addrspace(1) %out) #0 {
13551376; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
13561377; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
13571378; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
1379+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
13581380; GLOBAL-ISEL-NEXT: s_barrier_signal -1
13591381; GLOBAL-ISEL-NEXT: s_barrier_wait -1
13601382; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]
0 commit comments