-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[PowerPC] Add DMR and WACC COPY support #149129
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This patch updates PPCInstrInfo::copyPhysReg to support DMR and WACC register classes and extends the PPCVSXCopy pass to handle specific WACC copy patterns.
@llvm/pr-subscribers-backend-powerpc Author: Maryam Moghadas (maryammo) ChangesThis patch updates PPCInstrInfo::copyPhysReg to support DMR and WACC register classes and extends the PPCVSXCopy pass to handle specific WACC copy patterns. Full diff: https://github.com/llvm/llvm-project/pull/149129.diff 7 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt
index 3808a26a0b92a..a5e1522753c8b 100644
--- a/llvm/lib/Target/PowerPC/CMakeLists.txt
+++ b/llvm/lib/Target/PowerPC/CMakeLists.txt
@@ -50,7 +50,7 @@ add_llvm_target(PowerPCCodeGen
PPCTargetTransformInfo.cpp
PPCTOCRegDeps.cpp
PPCTLSDynamicCall.cpp
- PPCVSXCopy.cpp
+ PPCVSXWACCCopy.cpp
PPCReduceCRLogicals.cpp
PPCVSXFMAMutate.cpp
PPCVSXSwapRemoval.cpp
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 124dac4584312..a8f0f215ebee5 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -39,7 +39,7 @@ class ModulePass;
FunctionPass *createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM);
FunctionPass *createPPCTOCRegDepsPass();
FunctionPass *createPPCEarlyReturnPass();
- FunctionPass *createPPCVSXCopyPass();
+ FunctionPass *createPPCVSXWACCCopyPass();
FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCVSXSwapRemovalPass();
FunctionPass *createPPCReduceCRLogicalsPass();
@@ -64,7 +64,7 @@ class ModulePass;
void initializePPCLoopInstrFormPrepPass(PassRegistry&);
void initializePPCTOCRegDepsPass(PassRegistry&);
void initializePPCEarlyReturnPass(PassRegistry&);
- void initializePPCVSXCopyPass(PassRegistry&);
+ void initializePPCVSXWACCCopyPass(PassRegistry&);
void initializePPCVSXFMAMutatePass(PassRegistry&);
void initializePPCVSXSwapRemovalPass(PassRegistry&);
void initializePPCReduceCRLogicalsPass(PassRegistry&);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 7c1550e99bae1..7cb7e05b55ca0 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
@@ -1863,6 +1864,48 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcRegSub1)
.addReg(SrcRegSub1, getKillRegState(KillSrc));
return;
+ } else if ((PPC::WACCRCRegClass.contains(DestReg) ||
+ PPC::WACC_HIRCRegClass.contains(DestReg)) &&
+ (PPC::WACCRCRegClass.contains(SrcReg) ||
+ PPC::WACC_HIRCRegClass.contains(SrcReg))) {
+
+ Opc = PPC::WACCRCRegClass.contains(SrcReg) ? PPC::DMXXEXTFDMR512
+ : PPC::DMXXEXTFDMR512_HI;
+
+ RegScavenger RS;
+ RS.enterBasicBlockEnd(MBB);
+ RS.backward(std::next(I));
+
+ Register TmpReg1 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
+ /* RestoreAfter */ false, 0,
+ /* AllowSpill */ false);
+
+ RS.setRegUsed(TmpReg1);
+ Register TmpReg2 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
+ /* RestoreAfter */ false, 0,
+ /* AllowSpill */ false);
+
+ BuildMI(MBB, I, DL, get(Opc))
+ .addReg(TmpReg1, RegState::Define)
+ .addReg(TmpReg2, RegState::Define)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+
+ Opc = PPC::WACCRCRegClass.contains(DestReg) ? PPC::DMXXINSTDMR512
+ : PPC::DMXXINSTDMR512_HI;
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(TmpReg1, RegState::Kill)
+ .addReg(TmpReg2, RegState::Kill);
+
+ return;
+ } else if (PPC::DMRRCRegClass.contains(DestReg) &&
+ PPC::DMRRCRegClass.contains(SrcReg)) {
+
+ BuildMI(MBB, I, DL, get(PPC::DMMR), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+
+ return;
+
} else
llvm_unreachable("Impossible reg-to-reg copy");
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index b5c6ac111dff0..ae92d5eab20cd 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -129,7 +129,7 @@ LLVMInitializePowerPCTarget() {
initializePPCLoopInstrFormPrepPass(PR);
initializePPCTOCRegDepsPass(PR);
initializePPCEarlyReturnPass(PR);
- initializePPCVSXCopyPass(PR);
+ initializePPCVSXWACCCopyPass(PR);
initializePPCVSXFMAMutatePass(PR);
initializePPCVSXSwapRemovalPass(PR);
initializePPCReduceCRLogicalsPass(PR);
@@ -528,7 +528,7 @@ bool PPCPassConfig::addInstSelector() {
addPass(createPPCCTRLoopsVerify());
#endif
- addPass(createPPCVSXCopyPass());
+ addPass(createPPCVSXWACCCopyPass());
return false;
}
diff --git a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
similarity index 76%
rename from llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
rename to llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
index 794095cd43769..044c945fc2049 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
@@ -1,4 +1,4 @@
-//===-------------- PPCVSXCopy.cpp - VSX Copy Legalization ----------------===//
+//===-------------- PPCVSXWACCCopy.cpp - VSX and WACC Copy Legalization ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -8,7 +8,7 @@
//
// A pass which deals with the complexity of generating legal VSX register
// copies to/from register classes which partially overlap with the VSX
-// register file.
+// register file and combines the wacc/wacc_hi copies when needed.
//
//===----------------------------------------------------------------------===//
@@ -29,12 +29,12 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-vsx-copy"
namespace {
- // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
+ // PPCVSXWACCCopy pass - For copies between VSX registers and non-VSX registers
// (Altivec and scalar floating-point registers), we need to transform the
// copies into subregister copies with other restrictions.
- struct PPCVSXCopy : public MachineFunctionPass {
+ struct PPCVSXWACCCopy : public MachineFunctionPass {
static char ID;
- PPCVSXCopy() : MachineFunctionPass(ID) {}
+ PPCVSXWACCCopy() : MachineFunctionPass(ID) {}
const TargetInstrInfo *TII;
@@ -122,6 +122,33 @@ namespace {
// Transform the original copy into a subregister extraction copy.
SrcMO.setReg(NewVReg);
SrcMO.setSubReg(PPC::sub_64);
+ } else if (IsRegInClass(DstMO.getReg(), &PPC::WACC_HIRCRegClass, MRI) &&
+ IsRegInClass(SrcMO.getReg(), &PPC::WACCRCRegClass, MRI)) {
+ // Matches the pattern:
+ // %a:waccrc = COPY %b.sub_wacc_hi:dmrrc
+ // %c:wacc_hirc = COPY %a:waccrc
+ // And replaces it with:
+ // %c:wacc_hirc = COPY %b.sub_wacc_hi:dmrrc
+ MachineInstr *DefMI = MRI.getUniqueVRegDef(SrcMO.getReg());
+ if (!DefMI || !DefMI->isCopy())
+ continue;
+
+ MachineOperand &OrigSrc = DefMI->getOperand(1);
+
+ if (!IsRegInClass(OrigSrc.getReg(), &PPC::DMRRCRegClass, MRI))
+ continue;
+
+ if (OrigSrc.getSubReg() != PPC::sub_wacc_hi)
+ continue;
+
+ // Rewrite the second copy to use the original register's subreg
+ SrcMO.setReg(OrigSrc.getReg());
+ SrcMO.setSubReg(PPC::sub_wacc_hi);
+ Changed = true;
+
+ // Remove the intermediate copy if safe
+ if (MRI.use_nodbg_empty(DefMI->getOperand(0).getReg()))
+ DefMI->eraseFromParent();
}
}
@@ -151,9 +178,9 @@ namespace {
};
} // end anonymous namespace
-INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
+INITIALIZE_PASS(PPCVSXWACCCopy, DEBUG_TYPE,
"PowerPC VSX Copy Legalization", false, false)
-char PPCVSXCopy::ID = 0;
+char PPCVSXWACCCopy::ID = 0;
FunctionPass*
-llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
+llvm::createPPCVSXWACCCopyPass() { return new PPCVSXWACCCopy(); }
diff --git a/llvm/test/CodeGen/PowerPC/dmr-copy.ll b/llvm/test/CodeGen/PowerPC/dmr-copy.ll
new file mode 100644
index 0000000000000..d5a24309f94d5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/dmr-copy.ll
@@ -0,0 +1,245 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @test_wacc_copy(ptr noundef %vdmrp, ptr noundef %vpp, <16 x i8> noundef %vc, ptr noundef %resp) #0 {
+; CHECK-LABEL: test_wacc_copy:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: std r31, -8(r1)
+; CHECK-NEXT: std r30, -16(r1)
+; CHECK-NEXT: mr r30, r1
+; CHECK-NEXT: clrldi r0, r1, 57
+; CHECK-NEXT: subfic r0, r0, -384
+; CHECK-NEXT: stdux r1, r1, r0
+; CHECK-NEXT: .cfi_def_cfa_register r30
+; CHECK-NEXT: .cfi_offset r31, -8
+; CHECK-NEXT: .cfi_offset r30, -16
+; CHECK-NEXT: mr r31, r1
+; CHECK-NEXT: std r3, 360(r31)
+; CHECK-NEXT: std r4, 352(r31)
+; CHECK-NEXT: stxv v2, 336(r31)
+; CHECK-NEXT: std r7, 328(r31)
+; CHECK-NEXT: ld r3, 360(r31)
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 224(r31)
+; CHECK-NEXT: stxvp vsp36, 192(r31)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 160(r31)
+; CHECK-NEXT: stxvp vsp36, 128(r31)
+; CHECK-NEXT: ld r3, 352(r31)
+; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxv v3, 0(r3)
+; CHECK-NEXT: stxv v2, 112(r31)
+; CHECK-NEXT: stxv v3, 96(r31)
+; CHECK-NEXT: lxv v2, 112(r31)
+; CHECK-NEXT: lxv v3, 96(r31)
+; CHECK-NEXT: lxv vs0, 336(r31)
+; CHECK-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 224(r31)
+; CHECK-NEXT: stxvp vsp36, 192(r31)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 160(r31)
+; CHECK-NEXT: stxvp vsp36, 128(r31)
+; CHECK-NEXT: lxvp vsp34, 128(r31)
+; CHECK-NEXT: lxvp vsp36, 160(r31)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 192(r31)
+; CHECK-NEXT: lxvp vsp36, 224(r31)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: ld r3, 328(r31)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r3)
+; CHECK-NEXT: stxvp vsp36, 64(r3)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r3)
+; CHECK-NEXT: stxvp vsp36, 0(r3)
+; CHECK-NEXT: mr r1, r30
+; CHECK-NEXT: ld r31, -8(r1)
+; CHECK-NEXT: ld r30, -16(r1)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_wacc_copy:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: std r31, -8(r1)
+; CHECK-BE-NEXT: std r30, -16(r1)
+; CHECK-BE-NEXT: mr r30, r1
+; CHECK-BE-NEXT: clrldi r0, r1, 57
+; CHECK-BE-NEXT: subfic r0, r0, -384
+; CHECK-BE-NEXT: stdux r1, r1, r0
+; CHECK-BE-NEXT: mr r31, r1
+; CHECK-BE-NEXT: std r3, 360(r31)
+; CHECK-BE-NEXT: std r4, 352(r31)
+; CHECK-BE-NEXT: stxv v2, 336(r31)
+; CHECK-BE-NEXT: std r5, 328(r31)
+; CHECK-BE-NEXT: ld r3, 360(r31)
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 224(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 192(r31)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 160(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 128(r31)
+; CHECK-BE-NEXT: ld r3, 352(r31)
+; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: lxv v3, 16(r3)
+; CHECK-BE-NEXT: stxv v3, 112(r31)
+; CHECK-BE-NEXT: stxv v2, 96(r31)
+; CHECK-BE-NEXT: lxv v2, 96(r31)
+; CHECK-BE-NEXT: lxv v3, 112(r31)
+; CHECK-BE-NEXT: lxv vs0, 336(r31)
+; CHECK-BE-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 224(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 192(r31)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 160(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 128(r31)
+; CHECK-BE-NEXT: lxvp vsp34, 224(r31)
+; CHECK-BE-NEXT: lxvp vsp36, 192(r31)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 160(r31)
+; CHECK-BE-NEXT: lxvp vsp36, 128(r31)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: ld r3, 328(r31)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r3)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r3)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r3)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r3)
+; CHECK-BE-NEXT: mr r1, r30
+; CHECK-BE-NEXT: ld r31, -8(r1)
+; CHECK-BE-NEXT: ld r30, -16(r1)
+; CHECK-BE-NEXT: blr
+entry:
+ %vdmrp.addr = alloca ptr, align 8
+ %vpp.addr = alloca ptr, align 8
+ %vc.addr = alloca <16 x i8>, align 16
+ %resp.addr = alloca ptr, align 8
+ %vdmr = alloca <1024 x i1>, align 128
+ %vp = alloca <256 x i1>, align 32
+ store ptr %vdmrp, ptr %vdmrp.addr, align 8
+ store ptr %vpp, ptr %vpp.addr, align 8
+ store <16 x i8> %vc, ptr %vc.addr, align 16
+ store ptr %resp, ptr %resp.addr, align 8
+ %0 = load ptr, ptr %vdmrp.addr, align 8
+ %1 = load <1024 x i1>, ptr %0, align 128
+ store <1024 x i1> %1, ptr %vdmr, align 128
+ %2 = load ptr, ptr %vpp.addr, align 8
+ %3 = load <256 x i1>, ptr %2, align 32
+ store <256 x i1> %3, ptr %vp, align 32
+ %4 = load <256 x i1>, ptr %vp, align 32
+ %5 = load <16 x i8>, ptr %vc.addr, align 16
+ %6 = call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> %4, <16 x i8> %5)
+ store <1024 x i1> %6, ptr %vdmr, align 128
+ %7 = load <1024 x i1>, ptr %vdmr, align 128
+ %8 = load ptr, ptr %resp.addr, align 8
+ store <1024 x i1> %7, ptr %8, align 128
+ ret void
+}
+
+define void @foo(ptr noundef readonly captures(none) %p1, ptr noundef readonly captures(none) %p2, ptr noundef writeonly captures(none) initializes((0, 128)) %res1, ptr noundef writeonly captures(none) initializes((0, 128)) %res2) local_unnamed_addr #0 {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: dmsetdmrz dmr0
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT: dmmr dmr2, dmr0
+; CHECK-NEXT: dmxor dmr2, dmr1
+; CHECK-NEXT: lxvp vsp34, 0(r4)
+; CHECK-NEXT: lxvp vsp36, 32(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r4)
+; CHECK-NEXT: lxvp vsp36, 96(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT: dmxor dmr0, dmr1
+; CHECK-NEXT: dmmr dmr1, dmr2
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: dmmr dmr0, dmr0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r6)
+; CHECK-NEXT: stxvp vsp36, 64(r6)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r6)
+; CHECK-NEXT: stxvp vsp36, 0(r6)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: foo:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: dmsetdmrz dmr0
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmmr dmr2, dmr0
+; CHECK-BE-NEXT: dmxor dmr2, dmr1
+; CHECK-BE-NEXT: lxvp vsp34, 96(r4)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r4)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmxor dmr0, dmr1
+; CHECK-BE-NEXT: dmmr dmr1, dmr2
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: dmmr dmr0, dmr0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+ %1 = load <1024 x i1>, ptr %p1, align 128
+ %2 = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %0, <1024 x i1> %1)
+ %3 = load <1024 x i1>, ptr %p2, align 128
+ %4 = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %0, <1024 x i1> %3)
+ %5 = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %2)
+ store <1024 x i1> %5, ptr %res1, align 128
+ %6 = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %4)
+ store <1024 x i1> %6, ptr %res2, align 128
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)
+declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
+declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1>, <16 x i8>)
+
+attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="future" "target-features"="+64bit,+allow-unaligned-fp-access,+altivec,+bpermd,+cmpb,+crbits,+crypto,+direct-move,+extdiv,+fast-MFLR,+fcpsgn,+fpcvt,+fprnd,+fpu,+fre,+fres,+frsqrte,+frsqrtes,+fsqrt,+fuse-add-logical,+fuse-arith-add,+fuse-logical,+fuse-logical-add,+fuse-sha3,+fuse-store,+fusion,+hard-float,+icbt,+isa-future-instructions,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+isa-v31-instructions,+isel,+ldbrx,+lfiwax,+mfocrf,+mma,+paired-vector-memops,+partword-atomics,+pcrelative-memops,+popcntd,+power10-vector,+power8-altivec,+power8-vector,+power9-altivec,+power9-vector,+ppc-postra-sched,+ppc-prera-sched,+predictable-select-expensive,+prefix-instrs,+quadword-atomics,+recipprec,+stfiwx,+two-const-nr,+vsx" }
+
+
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
index ea3615cee392a..8ab54156a8af2 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
@@ -93,7 +93,7 @@ static_library("LLVMPowerPCCodeGen") {
"PPCTargetMachine.cpp",
"PPCTargetObjectFile.cpp",
"PPCTargetTransformInfo.cpp",
- "PPCVSXCopy.cpp",
+ "PPCVSXWACCCopy.cpp",
"PPCVSXFMAMutate.cpp",
"PPCVSXSwapRemoval.cpp",
]
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions h,cpp -- llvm/lib/Target/PowerPC/PPC.h llvm/lib/Target/PowerPC/PPCInstrInfo.cpp llvm/lib/Target/PowerPC/PPCTargetMachine.cpp llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index a8f0f215e..a7cd5cde1 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -64,7 +64,7 @@ class ModulePass;
void initializePPCLoopInstrFormPrepPass(PassRegistry&);
void initializePPCTOCRegDepsPass(PassRegistry&);
void initializePPCEarlyReturnPass(PassRegistry&);
- void initializePPCVSXWACCCopyPass(PassRegistry&);
+ void initializePPCVSXWACCCopyPass(PassRegistry &);
void initializePPCVSXFMAMutatePass(PassRegistry&);
void initializePPCVSXSwapRemovalPass(PassRegistry&);
void initializePPCReduceCRLogicalsPass(PassRegistry&);
diff --git a/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp b/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
index 044c945fc..c5f711a8f 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
@@ -1,4 +1,5 @@
-//===-------------- PPCVSXWACCCopy.cpp - VSX and WACC Copy Legalization ----------------===//
+//===-------------- PPCVSXWACCCopy.cpp - VSX and WACC Copy Legalization
+//----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -29,158 +30,154 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-vsx-copy"
namespace {
- // PPCVSXWACCCopy pass - For copies between VSX registers and non-VSX registers
- // (Altivec and scalar floating-point registers), we need to transform the
- // copies into subregister copies with other restrictions.
- struct PPCVSXWACCCopy : public MachineFunctionPass {
- static char ID;
- PPCVSXWACCCopy() : MachineFunctionPass(ID) {}
-
- const TargetInstrInfo *TII;
-
- bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
- MachineRegisterInfo &MRI) {
- if (Register::isVirtualRegister(Reg)) {
- return RC->hasSubClassEq(MRI.getRegClass(Reg));
- } else if (RC->contains(Reg)) {
- return true;
- }
-
- return false;
+// PPCVSXWACCCopy pass - For copies between VSX registers and non-VSX registers
+// (Altivec and scalar floating-point registers), we need to transform the
+// copies into subregister copies with other restrictions.
+struct PPCVSXWACCCopy : public MachineFunctionPass {
+ static char ID;
+ PPCVSXWACCCopy() : MachineFunctionPass(ID) {}
+
+ const TargetInstrInfo *TII;
+
+ bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
+ MachineRegisterInfo &MRI) {
+ if (Register::isVirtualRegister(Reg)) {
+ return RC->hasSubClassEq(MRI.getRegClass(Reg));
+ } else if (RC->contains(Reg)) {
+ return true;
}
- bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
- return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
- }
+ return false;
+ }
- bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
- return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
- }
+ bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
+ }
- bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
- return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
- }
+ bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
+ }
- bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) {
- return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI);
- }
+ bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
+ }
- bool IsVSSReg(unsigned Reg, MachineRegisterInfo &MRI) {
- return IsRegInClass(Reg, &PPC::VSSRCRegClass, MRI);
- }
+ bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI);
+ }
+
+ bool IsVSSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSSRCRegClass, MRI);
+ }
protected:
- bool processBlock(MachineBasicBlock &MBB) {
- bool Changed = false;
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (MachineInstr &MI : MBB) {
+ if (!MI.isFullCopy())
+ continue;
+
+ MachineOperand &DstMO = MI.getOperand(0);
+ MachineOperand &SrcMO = MI.getOperand(1);
+
+ if (IsVSReg(DstMO.getReg(), MRI) && !IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *to* a VSX register from a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *SrcRC = &PPC::VSLRCRegClass;
+ assert((IsF8Reg(SrcMO.getReg(), MRI) || IsVSSReg(SrcMO.getReg(), MRI) ||
+ IsVSFReg(SrcMO.getReg(), MRI)) &&
+ "Unknown source for a VSX copy");
+
+ Register NewVReg = MRI.createVirtualRegister(SrcRC);
+ BuildMI(MBB, MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
+ .addImm(1) // add 1, not 0, because there is no implicit clearing
+ // of the high bits.
+ .add(SrcMO)
+ .addImm(PPC::sub_64);
+
+ // The source of the original copy is now the new virtual register.
+ SrcMO.setReg(NewVReg);
+ } else if (!IsVSReg(DstMO.getReg(), MRI) &&
+ IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *from* a VSX register to a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *DstRC = &PPC::VSLRCRegClass;
+ assert((IsF8Reg(DstMO.getReg(), MRI) || IsVSFReg(DstMO.getReg(), MRI) ||
+ IsVSSReg(DstMO.getReg(), MRI)) &&
+ "Unknown destination for a VSX copy");
+
+ // Copy the VSX value into a new VSX register of the correct subclass.
+ Register NewVReg = MRI.createVirtualRegister(DstRC);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVReg)
+ .add(SrcMO);
+
+ // Transform the original copy into a subregister extraction copy.
+ SrcMO.setReg(NewVReg);
+ SrcMO.setSubReg(PPC::sub_64);
+ } else if (IsRegInClass(DstMO.getReg(), &PPC::WACC_HIRCRegClass, MRI) &&
+ IsRegInClass(SrcMO.getReg(), &PPC::WACCRCRegClass, MRI)) {
+ // Matches the pattern:
+ // %a:waccrc = COPY %b.sub_wacc_hi:dmrrc
+ // %c:wacc_hirc = COPY %a:waccrc
+ // And replaces it with:
+ // %c:wacc_hirc = COPY %b.sub_wacc_hi:dmrrc
+ MachineInstr *DefMI = MRI.getUniqueVRegDef(SrcMO.getReg());
+ if (!DefMI || !DefMI->isCopy())
+ continue;
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- for (MachineInstr &MI : MBB) {
- if (!MI.isFullCopy())
+ MachineOperand &OrigSrc = DefMI->getOperand(1);
+
+ if (!IsRegInClass(OrigSrc.getReg(), &PPC::DMRRCRegClass, MRI))
continue;
- MachineOperand &DstMO = MI.getOperand(0);
- MachineOperand &SrcMO = MI.getOperand(1);
-
- if ( IsVSReg(DstMO.getReg(), MRI) &&
- !IsVSReg(SrcMO.getReg(), MRI)) {
- // This is a copy *to* a VSX register from a non-VSX register.
- Changed = true;
-
- const TargetRegisterClass *SrcRC = &PPC::VSLRCRegClass;
- assert((IsF8Reg(SrcMO.getReg(), MRI) ||
- IsVSSReg(SrcMO.getReg(), MRI) ||
- IsVSFReg(SrcMO.getReg(), MRI)) &&
- "Unknown source for a VSX copy");
-
- Register NewVReg = MRI.createVirtualRegister(SrcRC);
- BuildMI(MBB, MI, MI.getDebugLoc(),
- TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
- .addImm(1) // add 1, not 0, because there is no implicit clearing
- // of the high bits.
- .add(SrcMO)
- .addImm(PPC::sub_64);
-
- // The source of the original copy is now the new virtual register.
- SrcMO.setReg(NewVReg);
- } else if (!IsVSReg(DstMO.getReg(), MRI) &&
- IsVSReg(SrcMO.getReg(), MRI)) {
- // This is a copy *from* a VSX register to a non-VSX register.
- Changed = true;
-
- const TargetRegisterClass *DstRC = &PPC::VSLRCRegClass;
- assert((IsF8Reg(DstMO.getReg(), MRI) ||
- IsVSFReg(DstMO.getReg(), MRI) ||
- IsVSSReg(DstMO.getReg(), MRI)) &&
- "Unknown destination for a VSX copy");
-
- // Copy the VSX value into a new VSX register of the correct subclass.
- Register NewVReg = MRI.createVirtualRegister(DstRC);
- BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
- NewVReg)
- .add(SrcMO);
-
- // Transform the original copy into a subregister extraction copy.
- SrcMO.setReg(NewVReg);
- SrcMO.setSubReg(PPC::sub_64);
- } else if (IsRegInClass(DstMO.getReg(), &PPC::WACC_HIRCRegClass, MRI) &&
- IsRegInClass(SrcMO.getReg(), &PPC::WACCRCRegClass, MRI)) {
- // Matches the pattern:
- // %a:waccrc = COPY %b.sub_wacc_hi:dmrrc
- // %c:wacc_hirc = COPY %a:waccrc
- // And replaces it with:
- // %c:wacc_hirc = COPY %b.sub_wacc_hi:dmrrc
- MachineInstr *DefMI = MRI.getUniqueVRegDef(SrcMO.getReg());
- if (!DefMI || !DefMI->isCopy())
- continue;
-
- MachineOperand &OrigSrc = DefMI->getOperand(1);
-
- if (!IsRegInClass(OrigSrc.getReg(), &PPC::DMRRCRegClass, MRI))
- continue;
-
- if (OrigSrc.getSubReg() != PPC::sub_wacc_hi)
- continue;
-
- // Rewrite the second copy to use the original register's subreg
- SrcMO.setReg(OrigSrc.getReg());
- SrcMO.setSubReg(PPC::sub_wacc_hi);
- Changed = true;
-
- // Remove the intermediate copy if safe
- if (MRI.use_nodbg_empty(DefMI->getOperand(0).getReg()))
- DefMI->eraseFromParent();
- }
- }
+ if (OrigSrc.getSubReg() != PPC::sub_wacc_hi)
+ continue;
- return Changed;
+ // Rewrite the second copy to use the original register's subreg
+ SrcMO.setReg(OrigSrc.getReg());
+ SrcMO.setSubReg(PPC::sub_wacc_hi);
+ Changed = true;
+
+ // Remove the intermediate copy if safe
+ if (MRI.use_nodbg_empty(DefMI->getOperand(0).getReg()))
+ DefMI->eraseFromParent();
+ }
}
+ return Changed;
+ }
+
public:
- bool runOnMachineFunction(MachineFunction &MF) override {
- // If we don't have VSX on the subtarget, don't do anything.
- const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
- if (!STI.hasVSX())
- return false;
- TII = STI.getInstrInfo();
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // If we don't have VSX on the subtarget, don't do anything.
+ const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
+ if (!STI.hasVSX())
+ return false;
+ TII = STI.getInstrInfo();
- bool Changed = false;
+ bool Changed = false;
- for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
- if (processBlock(B))
- Changed = true;
+ for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
+ if (processBlock(B))
+ Changed = true;
- return Changed;
- }
+ return Changed;
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
- } // end anonymous namespace
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+} // end anonymous namespace
-INITIALIZE_PASS(PPCVSXWACCCopy, DEBUG_TYPE,
- "PowerPC VSX Copy Legalization", false, false)
+INITIALIZE_PASS(PPCVSXWACCCopy, DEBUG_TYPE, "PowerPC VSX Copy Legalization",
+ false, false)
char PPCVSXWACCCopy::ID = 0;
-FunctionPass*
-llvm::createPPCVSXWACCCopyPass() { return new PPCVSXWACCCopy(); }
+FunctionPass *llvm::createPPCVSXWACCCopyPass() { return new PPCVSXWACCCopy(); }
|
// %a:waccrc = COPY %b.sub_wacc_hi:dmrrc | ||
// %c:wacc_hirc = COPY %a:waccrc | ||
// And replaces it with: | ||
// %c:wacc_hirc = COPY %b.sub_wacc_hi:dmrrc |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: indent off
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will run this through clang-format. Since it modifies this file significantly, I kept it for after the review is done.
; CHECK-NEXT: .cfi_def_cfa_register r30 | ||
; CHECK-NEXT: .cfi_offset r31, -8 | ||
; CHECK-NEXT: .cfi_offset r30, -16 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the attribute #0
needed to reproduce the test?
adding nounwind
to the function will eliminate these
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, #0 is needed to reproduce the wacc copy, and it already contains nounwind.
This patch updates PPCInstrInfo::copyPhysReg to support DMR and WACC register classes and extends the PPCVSXCopy pass to handle specific WACC copy patterns.