-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[GISel] Handle Flags in G_PTR_ADD Combines #152495
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[GISel] Handle Flags in G_PTR_ADD Combines #152495
Conversation
So far, GlobalISel's G_PTR_ADD combines have ignored MIFlags like nuw, nusw, and inbounds. That was in many cases unnecessarily conservative and in others unsound, since reassociations re-used the existing G_PTR_ADD instructions without invalidating their flags. This patch aims to improve that. I've checked the transforms in this PR with Alive2 on corresponding middle-end IR constructs. A longer-term goal would be to encapsulate the logic that determines which GEP/ISD::PTRADD/G_PTR_ADD flags can be preserved in which case, since this occurs in similar forms in the middle end, the SelectionDAG combines, and the GlobalISel combines here. For SWDEV-516125.
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: Fabian Ritter (ritter-x2a) ChangesSo far, GlobalISel's G_PTR_ADD combines have ignored MIFlags like nuw, nusw, I've checked the transforms in this PR with Alive2 on corresponding middle-end A longer-term goal would be to encapsulate the logic that determines which For SWDEV-516125. Full diff: https://github.com/llvm/llvm-project/pull/152495.diff 4 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index da829046cc421..15a5f82ac0567 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -66,6 +66,7 @@ struct PtrAddChain {
int64_t Imm;
Register Base;
const RegisterBank *Bank;
+ unsigned Flags;
};
struct RegisterImmPair {
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index e84ba91c47c8b..8163dea4e31c0 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1821,10 +1821,29 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
return false;
}
+ // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
+ // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
+ // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
+ // largest signed integer that fits into the index type, which is the maximum
+ // size of allocated objects according to the IR Language Reference.
+ unsigned PtrAddFlags = MI.getFlags();
+ unsigned LHSPtrAddFlags = Add2Def->getFlags();
+ bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
+ bool IsInBounds =
+ PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
+ unsigned Flags = 0;
+ if (IsNoUWrap)
+ Flags |= MachineInstr::MIFlag::NoUWrap;
+ if (IsInBounds) {
+ Flags |= MachineInstr::MIFlag::InBounds;
+ Flags |= MachineInstr::MIFlag::NoUSWrap;
+ }
+
// Pass the combined immediate to the apply function.
MatchInfo.Imm = AMNew.BaseOffs;
MatchInfo.Base = Base;
MatchInfo.Bank = getRegBank(Imm2);
+ MatchInfo.Flags = Flags;
return true;
}
@@ -1838,6 +1857,7 @@ void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
Observer.changingInstr(MI);
MI.getOperand(1).setReg(MatchInfo.Base);
MI.getOperand(2).setReg(NewOffset.getReg(0));
+ MI.setFlags(MatchInfo.Flags);
Observer.changedInstr(MI);
}
@@ -4871,14 +4891,34 @@ bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI,
if (!C2)
return false;
+ // If both additions are nuw, the reassociated additions are also nuw.
+ // If the original G_PTR_ADD is additionally nusw, X and C are both not
+ // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
+ // therefore also nusw.
+ // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
+ // the new G_PTR_ADDs are then also inbounds.
+ unsigned PtrAddFlags = MI.getFlags();
+ unsigned AddFlags = RHS->getFlags();
+ bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
+ bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
+ bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
+ unsigned Flags = 0;
+ if (IsNoUWrap)
+ Flags |= MachineInstr::MIFlag::NoUWrap;
+ if (IsNoUSWrap)
+ Flags |= MachineInstr::MIFlag::NoUSWrap;
+ if (IsInBounds)
+ Flags |= MachineInstr::MIFlag::InBounds;
+
MatchInfo = [=, &MI](MachineIRBuilder &B) {
LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
auto NewBase =
- Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
+ Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
Observer.changingInstr(MI);
MI.getOperand(1).setReg(NewBase.getReg(0));
MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
+ MI.setFlags(Flags);
Observer.changedInstr(MI);
};
return !reassociationCanBreakAddressingModePattern(MI);
@@ -4897,6 +4937,25 @@ bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
return false;
auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
+
+ // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
+ // nuw and inbounds (which implies nusw), the offsets are both non-negative,
+ // so the new G_PTR_ADDs are also inbounds.
+ unsigned PtrAddFlags = MI.getFlags();
+ unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
+ bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
+ bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
+ MachineInstr::MIFlag::NoUSWrap);
+ bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
+ MachineInstr::MIFlag::InBounds);
+ unsigned Flags = 0;
+ if (IsNoUWrap)
+ Flags |= MachineInstr::MIFlag::NoUWrap;
+ if (IsNoUSWrap)
+ Flags |= MachineInstr::MIFlag::NoUSWrap;
+ if (IsInBounds)
+ Flags |= MachineInstr::MIFlag::InBounds;
+
MatchInfo = [=, &MI](MachineIRBuilder &B) {
// When we change LHSPtrAdd's offset register we might cause it to use a reg
// before its def. Sink the instruction so the outer PTR_ADD to ensure this
@@ -4907,9 +4966,11 @@ bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
Observer.changingInstr(MI);
MI.getOperand(2).setReg(NewCst.getReg(0));
+ MI.setFlags(Flags);
Observer.changedInstr(MI);
Observer.changingInstr(*LHSPtrAdd);
LHSPtrAdd->getOperand(2).setReg(RHSReg);
+ LHSPtrAdd->setFlags(Flags);
Observer.changedInstr(*LHSPtrAdd);
};
return !reassociationCanBreakAddressingModePattern(MI);
@@ -4933,11 +4994,30 @@ bool CombinerHelper::matchReassocFoldConstantsInSubTree(
if (!C2)
return false;
+ // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
+ // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
+ // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
+ // largest signed integer that fits into the index type, which is the maximum
+ // size of allocated objects according to the IR Language Reference.
+ unsigned PtrAddFlags = MI.getFlags();
+ unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
+ bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
+ bool IsInBounds =
+ PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
+ unsigned Flags = 0;
+ if (IsNoUWrap)
+ Flags |= MachineInstr::MIFlag::NoUWrap;
+ if (IsInBounds) {
+ Flags |= MachineInstr::MIFlag::InBounds;
+ Flags |= MachineInstr::MIFlag::NoUSWrap;
+ }
+
MatchInfo = [=, &MI](MachineIRBuilder &B) {
auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
Observer.changingInstr(MI);
MI.getOperand(1).setReg(LHSSrc1);
MI.getOperand(2).setReg(NewCst.getReg(0));
+ MI.setFlags(Flags);
Observer.changedInstr(MI);
};
return !reassociationCanBreakAddressingModePattern(MI);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
index 544cf0f6870aa..764f1c06a9b93 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
@@ -320,3 +320,245 @@ body: |
RET_ReallyLR
...
+---
+name: rhs_add_nuw
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: rhs_add_nuw
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[COPY1]](s64)
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD]], [[C]](s64)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8))
+ ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(s64) = nuw G_ADD %1, %2
+ %4:_(p0) = nuw G_PTR_ADD %0, %3(s64)
+ %7:_(s32) = G_LOAD %4(p0) :: (load 1)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: rhs_add_nuw_nusw
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: rhs_add_nuw_nusw
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[COPY1]](s64)
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD]], [[C]](s64)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8))
+ ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(s64) = nuw G_ADD %1, %2
+ %4:_(p0) = nuw nusw G_PTR_ADD %0, %3(s64)
+ %7:_(s32) = G_LOAD %4(p0) :: (load 1)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: rhs_add_nuw_inbounds
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: rhs_add_nuw_inbounds
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[COPY]], [[COPY1]](s64)
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[PTR_ADD]], [[C]](s64)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8))
+ ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(s64) = nuw G_ADD %1, %2
+ %4:_(p0) = nuw nusw inbounds G_PTR_ADD %0, %3(s64)
+ %7:_(s32) = G_LOAD %4(p0) :: (load 1)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: reassoc_nuw
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: reassoc_nuw
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[COPY1]](s64)
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD]], [[C]](s64)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8))
+ ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(p0) = nuw G_PTR_ADD %0, %2(s64)
+ %4:_(p0) = nuw G_PTR_ADD %3(p0), %1
+ %7:_(s32) = G_LOAD %4(p0) :: (load 1)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: reassoc_nuw_nusw
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: reassoc_nuw_nusw
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[COPY1]](s64)
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD]], [[C]](s64)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8))
+ ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(p0) = nuw nusw G_PTR_ADD %0, %2(s64)
+ %4:_(p0) = nuw nusw G_PTR_ADD %3(p0), %1
+ %7:_(s32) = G_LOAD %4(p0) :: (load 1)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: reassoc_nuw_inbounds
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: reassoc_nuw_inbounds
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[COPY]], [[COPY1]](s64)
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[PTR_ADD]], [[C]](s64)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8))
+ ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(p0) = nuw nusw inbounds G_PTR_ADD %0, %2(s64)
+ %4:_(p0) = nuw nusw inbounds G_PTR_ADD %3(p0), %1
+ %7:_(s32) = G_LOAD %4(p0) :: (load 1)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: combine_offsets_nuw
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: combine_offsets_nuw
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 11
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8))
+ ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %1:_(s64) = G_CONSTANT i64 10
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(p0) = nuw G_PTR_ADD %0, %1
+ %4:_(p0) = nuw G_PTR_ADD %3(p0), %2
+ %7:_(s32) = G_LOAD %4(p0) :: (load 1)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: combine_offsets_nuw_inbounds
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: combine_offsets_nuw_inbounds
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 11
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8))
+ ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %1:_(s64) = G_CONSTANT i64 10
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(p0) = nuw nusw inbounds G_PTR_ADD %0, %1
+ %4:_(p0) = nuw nusw inbounds G_PTR_ADD %3(p0), %2
+ %7:_(s32) = G_LOAD %4(p0) :: (load 1)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combine-ptr-add-chain.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combine-ptr-add-chain.mir
index 4ed9322802693..cfce267f40096 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combine-ptr-add-chain.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combine-ptr-add-chain.mir
@@ -27,3 +27,51 @@ body: |
$x0 = COPY %4(p0)
RET_ReallyLR implicit $x0
...
+---
+name: ptradd_chain_nuw
+tracksRegLiveness: true
+legalized: true
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: ptradd_chain_nuw
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(p0) = COPY $x0
+ %1:_(s64) = G_CONSTANT i64 4
+ %2:_(s64) = G_CONSTANT i64 12
+ %3:_(p0) = nuw G_PTR_ADD %0(p0), %1
+ %4:_(p0) = nuw G_PTR_ADD %3(p0), %2
+ $x0 = COPY %4(p0)
+ RET_ReallyLR implicit $x0
+...
+---
+name: ptradd_chain_nuw_inbounds
+tracksRegLiveness: true
+legalized: true
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: ptradd_chain_nuw_inbounds
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(p0) = COPY $x0
+ %1:_(s64) = G_CONSTANT i64 4
+ %2:_(s64) = G_CONSTANT i64 12
+ %3:_(p0) = nuw nusw inbounds G_PTR_ADD %0(p0), %1
+ %4:_(p0) = nuw nusw inbounds G_PTR_ADD %3(p0), %2
+ $x0 = COPY %4(p0)
+ RET_ReallyLR implicit $x0
+...
|
So far, GlobalISel's G_PTR_ADD combines have ignored MIFlags like nuw, nusw,
and inbounds. That was in many cases unnecessarily conservative and in others
unsound, since reassociations re-used the existing G_PTR_ADD instructions
without invalidating their flags. This patch aims to improve that.
I've checked the transforms in this PR with Alive2 on corresponding middle-end
IR constructs.
A longer-term goal would be to encapsulate the logic that determines which
GEP/ISD::PTRADD/G_PTR_ADD flags can be preserved in which case, since this
occurs in similar forms in the middle end, the SelectionDAG combines, and the
GlobalISel combines here.
For SWDEV-516125.