Skip to content

Commit a5deb59

Browse files
authored
[AMDGPU] Add NoaliasAddrSpace to AAMDnodes (#149247)
This is the following PR of #136553 which calculate NoaliasAddrSpace. This PR carries the info calculated into MIR by adding it into AAMDnodes
1 parent 28c2c1e commit a5deb59

File tree

12 files changed

+133
-15
lines changed

12 files changed

+133
-15
lines changed

llvm/include/llvm/IR/Metadata.h

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -759,18 +759,18 @@ class MDString : public Metadata {
759759
/// memory access used by the alias-analysis infrastructure.
760760
struct AAMDNodes {
761761
explicit AAMDNodes() = default;
762-
explicit AAMDNodes(MDNode *T, MDNode *TS, MDNode *S, MDNode *N)
763-
: TBAA(T), TBAAStruct(TS), Scope(S), NoAlias(N) {}
762+
explicit AAMDNodes(MDNode *T, MDNode *TS, MDNode *S, MDNode *N, MDNode *NAS)
763+
: TBAA(T), TBAAStruct(TS), Scope(S), NoAlias(N), NoAliasAddrSpace(NAS) {}
764764

765765
bool operator==(const AAMDNodes &A) const {
766766
return TBAA == A.TBAA && TBAAStruct == A.TBAAStruct && Scope == A.Scope &&
767-
NoAlias == A.NoAlias;
767+
NoAlias == A.NoAlias && NoAliasAddrSpace == A.NoAliasAddrSpace;
768768
}
769769

770770
bool operator!=(const AAMDNodes &A) const { return !(*this == A); }
771771

772772
explicit operator bool() const {
773-
return TBAA || TBAAStruct || Scope || NoAlias;
773+
return TBAA || TBAAStruct || Scope || NoAlias || NoAliasAddrSpace;
774774
}
775775

776776
/// The tag for type-based alias analysis.
@@ -785,6 +785,9 @@ struct AAMDNodes {
785785
/// The tag specifying the noalias scope.
786786
MDNode *NoAlias = nullptr;
787787

788+
/// The tag specifying the noalias address spaces.
789+
MDNode *NoAliasAddrSpace = nullptr;
790+
788791
// Shift tbaa Metadata node to start off bytes later
789792
LLVM_ABI static MDNode *shiftTBAA(MDNode *M, size_t off);
790793

@@ -806,6 +809,8 @@ struct AAMDNodes {
806809
Result.TBAAStruct = Other.TBAAStruct == TBAAStruct ? TBAAStruct : nullptr;
807810
Result.Scope = Other.Scope == Scope ? Scope : nullptr;
808811
Result.NoAlias = Other.NoAlias == NoAlias ? NoAlias : nullptr;
812+
Result.NoAliasAddrSpace =
813+
Other.NoAliasAddrSpace == NoAliasAddrSpace ? NoAliasAddrSpace : nullptr;
809814
return Result;
810815
}
811816

@@ -818,6 +823,7 @@ struct AAMDNodes {
818823
TBAAStruct ? shiftTBAAStruct(TBAAStruct, Offset) : nullptr;
819824
Result.Scope = Scope;
820825
Result.NoAlias = NoAlias;
826+
Result.NoAliasAddrSpace = NoAliasAddrSpace;
821827
return Result;
822828
}
823829

@@ -833,6 +839,7 @@ struct AAMDNodes {
833839
Result.TBAAStruct = TBAAStruct;
834840
Result.Scope = Scope;
835841
Result.NoAlias = NoAlias;
842+
Result.NoAliasAddrSpace = NoAliasAddrSpace;
836843
return Result;
837844
}
838845

@@ -860,20 +867,21 @@ struct AAMDNodes {
860867
template<>
861868
struct DenseMapInfo<AAMDNodes> {
862869
static inline AAMDNodes getEmptyKey() {
863-
return AAMDNodes(DenseMapInfo<MDNode *>::getEmptyKey(),
864-
nullptr, nullptr, nullptr);
870+
return AAMDNodes(DenseMapInfo<MDNode *>::getEmptyKey(), nullptr, nullptr,
871+
nullptr, nullptr);
865872
}
866873

867874
static inline AAMDNodes getTombstoneKey() {
868-
return AAMDNodes(DenseMapInfo<MDNode *>::getTombstoneKey(),
875+
return AAMDNodes(DenseMapInfo<MDNode *>::getTombstoneKey(), nullptr,
869876
nullptr, nullptr, nullptr);
870877
}
871878

872879
static unsigned getHashValue(const AAMDNodes &Val) {
873880
return DenseMapInfo<MDNode *>::getHashValue(Val.TBAA) ^
874881
DenseMapInfo<MDNode *>::getHashValue(Val.TBAAStruct) ^
875882
DenseMapInfo<MDNode *>::getHashValue(Val.Scope) ^
876-
DenseMapInfo<MDNode *>::getHashValue(Val.NoAlias);
883+
DenseMapInfo<MDNode *>::getHashValue(Val.NoAlias) ^
884+
DenseMapInfo<MDNode *>::getHashValue(Val.NoAliasAddrSpace);
877885
}
878886

879887
static bool isEqual(const AAMDNodes &LHS, const AAMDNodes &RHS) {

llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,8 @@ AAMDNodes AAMDNodes::merge(const AAMDNodes &Other) const {
525525
Result.TBAAStruct = nullptr;
526526
Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope);
527527
Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias);
528+
Result.NoAliasAddrSpace = MDNode::getMostGenericNoaliasAddrspace(
529+
NoAliasAddrSpace, Other.NoAliasAddrSpace);
528530
return Result;
529531
}
530532

@@ -533,6 +535,8 @@ AAMDNodes AAMDNodes::concat(const AAMDNodes &Other) const {
533535
Result.TBAA = Result.TBAAStruct = nullptr;
534536
Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope);
535537
Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias);
538+
Result.NoAliasAddrSpace = MDNode::getMostGenericNoaliasAddrspace(
539+
NoAliasAddrSpace, Other.NoAliasAddrSpace);
536540
return Result;
537541
}
538542

llvm/lib/CodeGen/MIRParser/MILexer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,7 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
616616
.Case("!range", MIToken::md_range)
617617
.Case("!DIExpression", MIToken::md_diexpr)
618618
.Case("!DILocation", MIToken::md_dilocation)
619+
.Case("!noalias.addrspace", MIToken::md_noalias_addrspace)
619620
.Default(MIToken::Error);
620621
}
621622

llvm/lib/CodeGen/MIRParser/MILexer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ struct MIToken {
151151
md_tbaa,
152152
md_alias_scope,
153153
md_noalias,
154+
md_noalias_addrspace,
154155
md_range,
155156
md_diexpr,
156157
md_dilocation,

llvm/lib/CodeGen/MIRParser/MIParser.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3482,6 +3482,11 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
34823482
if (parseMDNode(AAInfo.NoAlias))
34833483
return true;
34843484
break;
3485+
case MIToken::md_noalias_addrspace:
3486+
lex();
3487+
if (parseMDNode(AAInfo.NoAliasAddrSpace))
3488+
return true;
3489+
break;
34853490
case MIToken::md_range:
34863491
lex();
34873492
if (parseMDNode(Range))
@@ -3490,7 +3495,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
34903495
// TODO: Report an error on duplicate metadata nodes.
34913496
default:
34923497
return error("expected 'align' or '!tbaa' or '!alias.scope' or "
3493-
"'!noalias' or '!range'");
3498+
"'!noalias' or '!range' or '!noalias.addrspace'");
34943499
}
34953500
}
34963501
if (expectAndConsume(MIToken::rparen))

llvm/lib/CodeGen/MachineOperand.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,6 +1273,10 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
12731273
OS << ", !noalias ";
12741274
AAInfo.NoAlias->printAsOperand(OS, MST);
12751275
}
1276+
if (AAInfo.NoAliasAddrSpace) {
1277+
OS << ", !noalias.addrspace ";
1278+
AAInfo.NoAliasAddrSpace->printAsOperand(OS, MST);
1279+
}
12761280
if (getRanges()) {
12771281
OS << ", !range ";
12781282
getRanges()->printAsOperand(OS, MST);

llvm/lib/IR/Metadata.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1796,6 +1796,7 @@ AAMDNodes Instruction::getAAMetadata() const {
17961796
Result.TBAAStruct = Info.lookup(LLVMContext::MD_tbaa_struct);
17971797
Result.Scope = Info.lookup(LLVMContext::MD_alias_scope);
17981798
Result.NoAlias = Info.lookup(LLVMContext::MD_noalias);
1799+
Result.NoAliasAddrSpace = Info.lookup(LLVMContext::MD_noalias_addrspace);
17991800
}
18001801
return Result;
18011802
}
@@ -1805,6 +1806,7 @@ void Instruction::setAAMetadata(const AAMDNodes &N) {
18051806
setMetadata(LLVMContext::MD_tbaa_struct, N.TBAAStruct);
18061807
setMetadata(LLVMContext::MD_alias_scope, N.Scope);
18071808
setMetadata(LLVMContext::MD_noalias, N.NoAlias);
1809+
setMetadata(LLVMContext::MD_noalias_addrspace, N.NoAliasAddrSpace);
18081810
}
18091811

18101812
void Instruction::setNoSanitizeMetadata() {

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da
1212
; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1313
; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
1414
; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
15-
; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
15+
; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0)
1616
; GFX942-NEXT: S_ENDPGM 0
1717
;
1818
; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
@@ -23,7 +23,7 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da
2323
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2424
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
2525
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
26-
; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
26+
; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0)
2727
; GFX11-NEXT: S_ENDPGM 0
2828
%ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data)
2929
ret void
@@ -38,7 +38,7 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data
3838
; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3939
; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
4040
; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
41-
; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
41+
; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0)
4242
; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
4343
; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
4444
;
@@ -50,7 +50,7 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data
5050
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
5151
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
5252
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
53-
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
53+
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0)
5454
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
5555
; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
5656
%ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data)

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %d
1313
; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1414
; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1515
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
16-
; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
16+
; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, !noalias.addrspace !0)
1717
; GFX90A_GFX942-NEXT: S_ENDPGM 0
1818
%ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
1919
ret void
@@ -30,7 +30,7 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %da
3030
; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3131
; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3232
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
33-
; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
33+
; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, !noalias.addrspace !0)
3434
; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
3535
; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
3636
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: not llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass none -o /dev/null %s 2>&1 | FileCheck %s
3+
4+
--- |
5+
define void @expect_id(ptr %ptr, float %data) #0 {
6+
%1 = atomicrmw fadd ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0
7+
ret void
8+
}
9+
10+
attributes #0 = { "target-cpu"="gfx1200" }
11+
12+
!0 = !{i32 5, i32 6}
13+
...
14+
15+
---
16+
name: expect_id
17+
18+
body: |
19+
bb.1 (%ir-block.0):
20+
liveins: $vgpr0, $vgpr1, $vgpr2
21+
22+
; CHECK: expected metadata id after '!'
23+
%2:vgpr_32 = COPY $vgpr0
24+
%3:vgpr_32 = COPY $vgpr1
25+
%0:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
26+
%1:vgpr_32 = COPY $vgpr2
27+
FLAT_ATOMIC_ADD_F32 %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !!)
28+
S_ENDPGM 0
29+
...

0 commit comments

Comments
 (0)