-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[AArch64][MachineCombiner] Fix setting reg state for gather lane pattern #149703
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[AArch64][MachineCombiner] Fix setting reg state for gather lane pattern #149703
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Jonathan Cohen (jcohen-apple) ChangesClosing #149585 #149644 Offset register was marked Full diff: https://github.com/llvm/llvm-project/pull/149703.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index bc57537ad5dfb..214eb815738af 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/CFIInstBuilder.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -7516,14 +7517,15 @@ generateGatherPattern(MachineInstr &Root,
auto LoadLaneToRegister = [&](MachineInstr *OriginalInstr,
Register SrcRegister, unsigned Lane,
- Register OffsetRegister) {
+ Register OffsetRegister,
+ bool OffsetRegisterKillState) {
auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
MachineInstrBuilder LoadIndexIntoRegister =
BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
NewRegister)
.addReg(SrcRegister)
.addImm(Lane)
- .addReg(OffsetRegister, getKillRegState(true));
+ .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
InsInstrs.push_back(LoadIndexIntoRegister);
return NewRegister;
@@ -7531,7 +7533,8 @@ generateGatherPattern(MachineInstr &Root,
// Helper to create load instruction based on opcode
auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg,
- Register OffsetReg) -> MachineInstrBuilder {
+ Register OffsetReg,
+ bool KillState) -> MachineInstrBuilder {
unsigned Opcode;
switch (NumLanes) {
case 4:
@@ -7557,25 +7560,30 @@ generateGatherPattern(MachineInstr &Root,
auto LanesToLoadToReg0 =
llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
LoadToLaneInstrsAscending.begin() + NumLanes / 2);
- auto PrevReg = SubregToReg->getOperand(0).getReg();
+ Register PrevReg = SubregToReg->getOperand(0).getReg();
for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
+ const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1,
- LoadInstr->getOperand(3).getReg());
+ OffsetRegOperand.getReg(),
+ OffsetRegOperand.isKill());
DelInstrs.push_back(LoadInstr);
}
- auto LastLoadReg0 = PrevReg;
+ Register LastLoadReg0 = PrevReg;
// First load into register 1. Perform a LDRSui to zero out the upper lanes in
// a single instruction.
- auto Lane0Load = *LoadToLaneInstrsAscending.begin();
- auto OriginalSplitLoad =
+ MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
+ MachineInstr *OriginalSplitLoad =
*std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
- auto DestRegForMiddleIndex = MRI.createVirtualRegister(
+ Register DestRegForMiddleIndex = MRI.createVirtualRegister(
MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
+ const MachineOperand &OriginalSplitToLoadOffsetOperand =
+ OriginalSplitLoad->getOperand(3);
MachineInstrBuilder MiddleIndexLoadInstr =
CreateLoadInstruction(NumLanes, DestRegForMiddleIndex,
- OriginalSplitLoad->getOperand(3).getReg());
+ OriginalSplitToLoadOffsetOperand.getReg(),
+ OriginalSplitToLoadOffsetOperand.isKill());
InstrIdxForVirtReg.insert(
std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
@@ -7583,7 +7591,7 @@ generateGatherPattern(MachineInstr &Root,
DelInstrs.push_back(OriginalSplitLoad);
// Subreg To Reg instruction for register 1.
- auto DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
+ Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
unsigned SubregType;
switch (NumLanes) {
case 4:
@@ -7616,14 +7624,18 @@ generateGatherPattern(MachineInstr &Root,
LoadToLaneInstrsAscending.end());
PrevReg = SubRegToRegInstr->getOperand(0).getReg();
for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
+ const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1,
- LoadInstr->getOperand(3).getReg());
+ OffsetRegOperand.getReg(),
+ OffsetRegOperand.isKill());
+
+ // Do not add the last reg to DelInstrs - it will be removed later.
if (Index == NumLanes / 2 - 2) {
break;
}
DelInstrs.push_back(LoadInstr);
}
- auto LastLoadReg1 = PrevReg;
+ Register LastLoadReg1 = PrevReg;
// Create the final zip instruction to combine the results.
MachineInstrBuilder ZipInstr =
diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir
index 09eb18b0e3574..5cddf92fdbb4c 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir
+++ b/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir
@@ -13,12 +13,12 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
- ; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[COPY1]], 0, 1
- ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub
- ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY2]]
+ ; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[COPY1]], 0, 1
+ ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, [[LD_i32]], %subreg.ssub
+ ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, [[COPY2]]
; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr32 = LDRSui [[COPY3]], 0
; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.ssub
- ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, killed [[COPY4]]
+ ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, [[COPY4]]
; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_1]], [[LD1_1]]
; CHECK-NEXT: $q0 = COPY [[ZIP]]
; CHECK-NEXT: RET_ReallyLR implicit $q0
@@ -27,11 +27,11 @@ body: |
%2:gpr64common = COPY $x2
%3:gpr64common = COPY $x3
%4:gpr64common = COPY $x4
- %5:fpr32 = LDRSroX %0, killed %1, 0, 1
- %6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub
- %7:fpr128 = LD1i32 %6, 1, killed %2
- %8:fpr128 = LD1i32 %7, 2, killed %3
- %9:fpr128 = LD1i32 %8, 3, killed %4
+ %5:fpr32 = LDRSroX %0, %1, 0, 1
+ %6:fpr128 = SUBREG_TO_REG 0, %5, %subreg.ssub
+ %7:fpr128 = LD1i32 %6, 1, %2
+ %8:fpr128 = LD1i32 %7, 2, %3
+ %9:fpr128 = LD1i32 %8, 3, %4
$q0 = COPY %9
RET_ReallyLR implicit $q0
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is OK. Replacing the auto's, whilst a good change, makes the differences a little more difficult to follow. It might be worth having a test where the kill flags are different between different instructions too, to show that the right ones are removed.
As there is another issue it might be better to revert the original and recommit these together with a fix.
Closing #149585 #149644
Offset register was marked
killed
without verifying it does not have additional uses, updated to use the reg kill state set previously.