From dbece84840f3f9aaf0f07466ed45bdd6ea698254 Mon Sep 17 00:00:00 2001 From: Evgenii Kudriashov Date: Tue, 1 Jul 2025 10:53:07 -0700 Subject: [PATCH 1/4] [X86][GlobalISel] Improve carry value selection Generally G_UADDE, G_UADDO, G_USUBE, G_USUBO are used together and it was enough to simply define EFLAGS. But if extractvalue is used, we end up with copy of EFLAGS into GPR. Generate SETB instruction if there is a user other than COPY to EFLAGS. Closes #120029 --- .../X86/GISel/X86InstructionSelector.cpp | 47 +++++++++++++------ .../lib/Target/X86/GISel/X86LegalizerInfo.cpp | 6 +-- .../CodeGen/X86/GlobalISel/legalize-add.mir | 20 ++++---- .../X86/GlobalISel/legalize-leading-zeros.mir | 15 ++++-- .../CodeGen/X86/GlobalISel/legalize-sub.mir | 20 ++++---- .../legalize-trailing-zeros-undef.mir | 8 ++-- .../GlobalISel/legalize-trailing-zeros.mir | 8 ++-- .../CodeGen/X86/{ => GlobalISel}/pr49087.ll | 26 ++++++++-- .../X86/GlobalISel/regbankselect-X32.mir | 4 +- .../CodeGen/X86/GlobalISel/select-add-x32.mir | 9 ++-- .../X86/GlobalISel/select-get-carry-bit.ll | 21 +++++++++ 11 files changed, 124 insertions(+), 60 deletions(-) rename llvm/test/CodeGen/X86/{ => GlobalISel}/pr49087.ll (50%) create mode 100644 llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 3090ad313b90d..60bfd38a8b2ec 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -1163,14 +1163,13 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, I.getOpcode() == TargetOpcode::G_USUBO) && "unexpected instruction"); - const Register DstReg = I.getOperand(0).getReg(); - const Register CarryOutReg = I.getOperand(1).getReg(); - const Register Op0Reg = I.getOperand(2).getReg(); - const Register Op1Reg = I.getOperand(3).getReg(); - bool IsSub = I.getOpcode() == TargetOpcode::G_USUBE || - I.getOpcode() == TargetOpcode::G_USUBO; - bool HasCarryIn = I.getOpcode() == TargetOpcode::G_UADDE || - I.getOpcode() == TargetOpcode::G_USUBE; + auto &CarryMI = cast(I); + + const Register DstReg = CarryMI.getDstReg(); + const Register CarryOutReg = CarryMI.getCarryOutReg(); + const Register Op0Reg = CarryMI.getLHSReg(); + const Register Op1Reg = CarryMI.getRHSReg(); + bool IsSub = CarryMI.isSub(); const LLT DstTy = MRI.getType(DstReg); assert(DstTy.isScalar() && "selectUAddSub only supported for scalar types"); @@ -1206,14 +1205,15 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, llvm_unreachable("selectUAddSub unsupported type."); } - const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); - const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); + const RegisterBank &CarryRB = *RBI.getRegBank(CarryOutReg, MRI, TRI); + const TargetRegisterClass *CarryRC = + getRegClass(MRI.getType(CarryOutReg), CarryRB); unsigned Opcode = IsSub ? OpSUB : OpADD; // G_UADDE/G_USUBE - find CarryIn def instruction. - if (HasCarryIn) { - Register CarryInReg = I.getOperand(4).getReg(); + if (auto CarryInMI = dyn_cast(&I)) { + Register CarryInReg = CarryInMI->getCarryInReg(); MachineInstr *Def = MRI.getVRegDef(CarryInReg); while (Def->getOpcode() == TargetOpcode::G_TRUNC) { CarryInReg = Def->getOperand(1).getReg(); @@ -1230,7 +1230,7 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, X86::EFLAGS) .addReg(CarryInReg); - if (!RBI.constrainGenericRegister(CarryInReg, *DstRC, MRI)) + if (!RBI.constrainGenericRegister(CarryInReg, *CarryRC, MRI)) return false; Opcode = IsSub ? OpSBB : OpADC; @@ -1253,9 +1253,28 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, .addReg(X86::EFLAGS); if (!constrainSelectedInstRegOperands(Inst, TII, TRI, RBI) || - !RBI.constrainGenericRegister(CarryOutReg, *DstRC, MRI)) + !RBI.constrainGenericRegister(CarryOutReg, *CarryRC, MRI)) return false; + // If there are instructions that use carry as value, we need to lower it + // differently than setting EFLAGS + Register SetCarryCC; + for (auto &Use : + llvm::make_early_inc_range(MRI.use_nodbg_operands(CarryOutReg))) { + MachineInstr *MI = Use.getParent(); + if (MI->isCopy() && MI->getOperand(0).getReg() == X86::EFLAGS) + continue; + if (!SetCarryCC) { + SetCarryCC = MRI.createGenericVirtualRegister(MRI.getType(CarryOutReg)); + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), + SetCarryCC) + .addImm(X86::COND_B); + if (!RBI.constrainGenericRegister(SetCarryCC, *CarryRC, MRI)) + return false; + } + Use.setReg(SetCarryCC); + } + I.eraseFromParent(); return true; } diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 8e304c07ed5cb..4b2b286a8fd94 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -164,11 +164,11 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .scalarize(0); getActionDefinitionsBuilder({G_UADDE, G_UADDO, G_USUBE, G_USUBO}) - .legalFor({{s8, s1}, {s16, s1}, {s32, s1}}) - .legalFor(Is64Bit, {{s64, s1}}) + .legalFor({{s8, s8}, {s16, s8}, {s32, s8}}) + .legalFor(Is64Bit, {{s64, s8}}) .widenScalarToNextPow2(0, /*Min=*/32) .clampScalar(0, s8, sMaxScalar) - .clampScalar(1, s1, s1) + .clampScalar(1, s8, s8) .scalarize(0); // integer multiply diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir index ec9db781b1bc2..dae2ad6f3bb45 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir @@ -157,8 +157,8 @@ body: | ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -192,8 +192,8 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -219,8 +219,8 @@ body: | ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X64-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](s128) ; X64-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X64-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; X64-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; X64-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV2]] + ; X64-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; X64-NEXT: $rax = COPY [[UADDO]](s64) ; X64-NEXT: $rdx = COPY [[UADDE]](s64) ; X64-NEXT: RET 0 @@ -230,10 +230,10 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s128) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV4]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV5]], [[UADDO1]] - ; X86-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV6]], [[UADDE1]] - ; X86-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV7]], [[UADDE3]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV4]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV5]], [[UADDO1]] + ; X86-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s8) = G_UADDE [[UV2]], [[UV6]], [[UADDE1]] + ; X86-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s8) = G_UADDE [[UV3]], [[UV7]], [[UADDE3]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; X86-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE2]](s32), [[UADDE4]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir index 19fe5b84c73ce..470a30fd36b62 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir @@ -25,6 +25,7 @@ body: | ; X64-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CTLZ]], [[C1]] ; X64-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C]] ; X64-NEXT: RET 0, implicit [[AND1]](s64) + ; ; X86-LABEL: name: test_ctlz35 ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[TRUNC:%[0-9]+]]:_(s35) = G_TRUNC [[COPY]](s64) @@ -46,12 +47,15 @@ body: | ; X86-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C]](s32) ; X86-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; X86-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV6]], [[UV8]] + ; X86-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[USUBO1]](s8) + ; X86-NEXT: [[ZEXT2:%[0-9]+]]:_(s8) = G_ZEXT [[TRUNC1]](s1) + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV7]], [[UV9]], [[ZEXT2]] + ; X86-NEXT: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[USUBE1]](s8) ; X86-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; X86-NEXT: [[TRUNC1:%[0-9]+]]:_(s35) = G_TRUNC [[MV2]](s64) - ; X86-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s35) - ; X86-NEXT: RET 0, implicit [[ZEXT2]](s64) + ; X86-NEXT: [[TRUNC3:%[0-9]+]]:_(s35) = G_TRUNC [[MV2]](s64) + ; X86-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC3]](s35) + ; X86-NEXT: RET 0, implicit [[ZEXT3]](s64) %0(s64) = COPY $rdx %1:_(s35) = G_TRUNC %0(s64) %2:_(s35) = G_CTLZ %1 @@ -97,6 +101,7 @@ body: | ; X64-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[DEF]](s64) ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[CTLZ]](s64) ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; ; X86-LABEL: name: test_ctlz64 ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir index ee2b9eefcb01a..ac3bf331373c9 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir @@ -157,8 +157,8 @@ body: | ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV2]] + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -192,8 +192,8 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV2]] + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -219,8 +219,8 @@ body: | ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X64-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](s128) ; X64-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X64-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; X64-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; X64-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV2]] + ; X64-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] ; X64-NEXT: $rax = COPY [[USUBO]](s64) ; X64-NEXT: $rdx = COPY [[USUBE]](s64) ; X64-NEXT: RET 0 @@ -230,10 +230,10 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s128) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV4]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV5]], [[USUBO1]] - ; X86-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV6]], [[USUBE1]] - ; X86-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV7]], [[USUBE3]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV4]] + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV5]], [[USUBO1]] + ; X86-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s8) = G_USUBE [[UV2]], [[UV6]], [[USUBE1]] + ; X86-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s8) = G_USUBE [[UV3]], [[UV7]], [[USUBE3]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) ; X86-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBE2]](s32), [[USUBE4]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir index 9807d13e3235a..57e729fb03ab6 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir @@ -32,8 +32,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[OR]](s32), [[C]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -97,8 +97,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[UV]](s32), [[C]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s32) ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C1]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C1]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir index e2d10423dbec5..f5d847776ec06 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir @@ -32,8 +32,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[OR]](s32), [[C]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -99,8 +99,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[UV]](s32), [[C]] ; X86-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[UV1]](s32) ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ]], [[C1]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ]], [[C1]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 diff --git a/llvm/test/CodeGen/X86/pr49087.ll b/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll similarity index 50% rename from llvm/test/CodeGen/X86/pr49087.ll rename to llvm/test/CodeGen/X86/GlobalISel/pr49087.ll index 1a29222466385..ffa02398fbf93 100644 --- a/llvm/test/CodeGen/X86/pr49087.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll @@ -1,9 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -global-isel < %s 2>&1 | FileCheck %s -; REQUIRES: asserts -; XFAIL: * define i32 @test_01(ptr %p, i64 %len, i32 %x) { -; CHECK-LABEL: test_01 +; CHECK-LABEL: test_01: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subq %rax, %rsi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: jne .LBB0_4 +; CHECK-NEXT: # %bb.2: # %backedge +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: imulq $4, %rsi, %rcx +; CHECK-NEXT: addq %rdi, %rcx +; CHECK-NEXT: cmpl %edx, (%rcx) +; CHECK-NEXT: sete %cl +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.3: # %failure +; CHECK-NEXT: .LBB0_4: # %exit +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: retq entry: %scevgep = getelementptr i32, ptr %p, i64 -1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir index 8eac3eaf36145..76680ac750625 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir @@ -29,8 +29,8 @@ body: | bb.0 (%ir-block.0): %0(s32) = IMPLICIT_DEF %1(s32) = IMPLICIT_DEF - %2(s1) = IMPLICIT_DEF - %3(s32), %4(s1) = G_UADDE %0, %1, %2 + %2(s8) = IMPLICIT_DEF + %3(s32), %4(s8) = G_UADDE %0, %1, %2 RET 0 ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir b/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir index 773813f19cddb..1deb52e0e4a25 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir @@ -31,10 +31,10 @@ body: | ; X32: [[DEF2:%[0-9]+]]:gr32 = IMPLICIT_DEF ; X32: [[DEF3:%[0-9]+]]:gr32 = IMPLICIT_DEF ; X32: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr [[DEF]], [[DEF2]], implicit-def $eflags - ; X32: [[COPY:%[0-9]+]]:gr32 = COPY $eflags + ; X32: [[COPY:%[0-9]+]]:gr8 = COPY $eflags ; X32: $eflags = COPY [[COPY]] ; X32: [[ADC32rr:%[0-9]+]]:gr32 = ADC32rr [[DEF1]], [[DEF3]], implicit-def $eflags, implicit $eflags - ; X32: [[COPY1:%[0-9]+]]:gr32 = COPY $eflags + ; X32: [[COPY1:%[0-9]+]]:gr8 = COPY $eflags ; X32: $eax = COPY [[ADD32rr]] ; X32: $edx = COPY [[ADC32rr]] ; X32: RET 0, implicit $eax, implicit $edx @@ -43,9 +43,8 @@ body: | %2(s32) = IMPLICIT_DEF %3(s32) = IMPLICIT_DEF %9(s8) = G_CONSTANT i8 0 - %4(s1) = G_TRUNC %9(s8) - %5(s32), %6(s1) = G_UADDE %0, %2, %4 - %7(s32), %8(s1) = G_UADDE %1, %3, %6 + %5(s32), %6(s8) = G_UADDE %0, %2, %9 + %7(s32), %8(s8) = G_UADDE %1, %3, %6 $eax = COPY %5(s32) $edx = COPY %7(s32) RET 0, implicit $eax, implicit $edx diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll b/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll new file mode 100644 index 0000000000000..f4f09ea1b16c7 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel=1 -global-isel-abort=1 | FileCheck %s + +; Issue #120029 +define i16 @use_carry_bit(i16 %2) { +; CHECK-LABEL: use_carry_bit: +; CHECK: # %bb.0: +; CHECK-NEXT: movw $1, %ax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: addw %di, %ax +; CHECK-NEXT: setb %cl +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: cmovnew %di, %ax +; CHECK-NEXT: retq + %4 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %2, i16 1) + %5 = extractvalue { i16, i1 } %4, 0 + %6 = extractvalue { i16, i1 } %4, 1 + %7 = select i1 %6, i16 %2, i16 %5 + ret i16 %7 +} + From 5ca565cd29316dea88961336b8c72030e4cf6a27 Mon Sep 17 00:00:00 2001 From: Evgenii Kudriashov Date: Tue, 29 Jul 2025 01:20:28 -0700 Subject: [PATCH 2/4] Always store a carry bit in a GPR --- .../X86/GISel/X86InstructionSelector.cpp | 30 ++++--------------- .../test/CodeGen/X86/GlobalISel/add-scalar.ll | 12 ++++++++ llvm/test/CodeGen/X86/GlobalISel/pr49087.ll | 2 +- .../CodeGen/X86/GlobalISel/select-add-x32.mir | 22 +++++++------- .../test/CodeGen/X86/GlobalISel/sub-scalar.ll | 12 ++++++++ 5 files changed, 42 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 60bfd38a8b2ec..0def5ff4f170f 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -1226,9 +1226,10 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, Def->getOpcode() == TargetOpcode::G_USUBE || Def->getOpcode() == TargetOpcode::G_USUBO) { // carry set by prev ADD/SUB. - BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), - X86::EFLAGS) - .addReg(CarryInReg); + + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::CMP8ri)) + .addReg(CarryInReg) + .addImm(1); if (!RBI.constrainGenericRegister(CarryInReg, *CarryRC, MRI)) return false; @@ -1249,32 +1250,13 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, .addReg(Op0Reg) .addReg(Op1Reg); - BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg) - .addReg(X86::EFLAGS); + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), + CarryOutReg).addImm(X86::COND_B); if (!constrainSelectedInstRegOperands(Inst, TII, TRI, RBI) || !RBI.constrainGenericRegister(CarryOutReg, *CarryRC, MRI)) return false; - // If there are instructions that use carry as value, we need to lower it - // differently than setting EFLAGS - Register SetCarryCC; - for (auto &Use : - llvm::make_early_inc_range(MRI.use_nodbg_operands(CarryOutReg))) { - MachineInstr *MI = Use.getParent(); - if (MI->isCopy() && MI->getOperand(0).getReg() == X86::EFLAGS) - continue; - if (!SetCarryCC) { - SetCarryCC = MRI.createGenericVirtualRegister(MRI.getType(CarryOutReg)); - BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), - SetCarryCC) - .addImm(X86::COND_B); - if (!RBI.constrainGenericRegister(SetCarryCC, *CarryRC, MRI)) - return false; - } - Use.setReg(SetCarryCC); - } - I.eraseFromParent(); return true; } diff --git a/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll index 7bde1b7a7a8be..7cdfd519f04dc 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll @@ -7,12 +7,15 @@ define i128 @test_add_i128(i128 %arg1, i128 %arg2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %rax ; X64-NEXT: addq %rdi, %rax +; X64-NEXT: setb %dl +; X64-NEXT: cmpb $1, %dl ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: retq ; ; X86-LABEL: test_add_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -21,8 +24,14 @@ define i128 @test_add_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl %edx, 4(%eax) @@ -30,6 +39,7 @@ define i128 @test_add_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl %edi, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx ; X86-NEXT: retl %ret = add i128 %arg1, %arg2 ret i128 %ret @@ -46,6 +56,8 @@ define i64 @test_add_i64(i64 %arg1, i64 %arg2) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax +; X86-NEXT: setb %cl +; X86-NEXT: cmpb $1, %cl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl %ret = add i64 %arg1, %arg2 diff --git a/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll b/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll index ffa02398fbf93..41d890bcc3c0b 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -global-isel < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -global-isel -global-isel-abort=1 < %s 2>&1 | FileCheck %s define i32 @test_01(ptr %p, i64 %len, i32 %x) { ; CHECK-LABEL: test_01: diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir b/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir index 1deb52e0e4a25..b85180fb467eb 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir @@ -27,17 +27,17 @@ body: | bb.0 (%ir-block.0): ; X32-LABEL: name: test_add_i64 ; X32: [[DEF:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[DEF1:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[DEF2:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[DEF3:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr [[DEF]], [[DEF2]], implicit-def $eflags - ; X32: [[COPY:%[0-9]+]]:gr8 = COPY $eflags - ; X32: $eflags = COPY [[COPY]] - ; X32: [[ADC32rr:%[0-9]+]]:gr32 = ADC32rr [[DEF1]], [[DEF3]], implicit-def $eflags, implicit $eflags - ; X32: [[COPY1:%[0-9]+]]:gr8 = COPY $eflags - ; X32: $eax = COPY [[ADD32rr]] - ; X32: $edx = COPY [[ADC32rr]] - ; X32: RET 0, implicit $eax, implicit $edx + ; X32-NEXT: [[DEF1:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X32-NEXT: [[DEF2:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X32-NEXT: [[DEF3:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X32-NEXT: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr [[DEF]], [[DEF2]], implicit-def $eflags + ; X32-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 2, implicit $eflags + ; X32-NEXT: CMP8ri [[SETCCr]], 1, implicit-def $eflags + ; X32-NEXT: [[ADC32rr:%[0-9]+]]:gr32 = ADC32rr [[DEF1]], [[DEF3]], implicit-def $eflags, implicit $eflags + ; X32-NEXT: [[SETCCr1:%[0-9]+]]:gr8 = SETCCr 2, implicit $eflags + ; X32-NEXT: $eax = COPY [[ADD32rr]] + ; X32-NEXT: $edx = COPY [[ADC32rr]] + ; X32-NEXT: RET 0, implicit $eax, implicit $edx %0(s32) = IMPLICIT_DEF %1(s32) = IMPLICIT_DEF %2(s32) = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll index 7a035f5e4ad4d..be75d7c3810e2 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll @@ -7,12 +7,15 @@ define i128 @test_sub_i128(i128 %arg1, i128 %arg2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: subq %rdx, %rax +; X64-NEXT: setb %dl +; X64-NEXT: cmpb $1, %dl ; X64-NEXT: sbbq %rcx, %rsi ; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: retq ; ; X86-LABEL: test_sub_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -21,8 +24,14 @@ define i128 @test_sub_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl %edx, 4(%eax) @@ -30,6 +39,7 @@ define i128 @test_sub_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl %edi, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx ; X86-NEXT: retl %ret = sub i128 %arg1, %arg2 ret i128 %ret @@ -47,6 +57,8 @@ define i64 @test_sub_i64(i64 %arg1, i64 %arg2) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: subl {{[0-9]+}}(%esp), %eax +; X86-NEXT: setb %cl +; X86-NEXT: cmpb $1, %cl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl %ret = sub i64 %arg1, %arg2 From a89f6afd57b5b539dd83424be639cd028e407fbf Mon Sep 17 00:00:00 2001 From: Evgenii Kudriashov Date: Tue, 29 Jul 2025 01:23:29 -0700 Subject: [PATCH 3/4] Some names for the test --- .../CodeGen/X86/GlobalISel/select-get-carry-bit.ll | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll b/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll index f4f09ea1b16c7..0cf1372fed497 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll @@ -12,10 +12,10 @@ define i16 @use_carry_bit(i16 %2) { ; CHECK-NEXT: andl $1, %ecx ; CHECK-NEXT: cmovnew %di, %ax ; CHECK-NEXT: retq - %4 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %2, i16 1) - %5 = extractvalue { i16, i1 } %4, 0 - %6 = extractvalue { i16, i1 } %4, 1 - %7 = select i1 %6, i16 %2, i16 %5 - ret i16 %7 + %uadd = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %2, i16 1) + %res = extractvalue { i16, i1 } %uadd, 0 + %carry = extractvalue { i16, i1 } %uadd, 1 + %ret = select i1 %carry, i16 %2, i16 %res + ret i16 %ret } From ed72606b79f5f20531fb8e48c9cbb90dcd654437 Mon Sep 17 00:00:00 2001 From: Evgenii Kudriashov Date: Tue, 29 Jul 2025 01:26:10 -0700 Subject: [PATCH 4/4] Format --- llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 0def5ff4f170f..c3bf3fc7500e4 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -1250,8 +1250,8 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, .addReg(Op0Reg) .addReg(Op1Reg); - BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), - CarryOutReg).addImm(X86::COND_B); + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), CarryOutReg) + .addImm(X86::COND_B); if (!constrainSelectedInstRegOperands(Inst, TII, TRI, RBI) || !RBI.constrainGenericRegister(CarryOutReg, *CarryRC, MRI))