Skip to content

Commit 096dd97

Browse files
committed
[DAGCombiner] infer wrap flags for trunc, use to fold itofp
1 parent e3eb16a commit 096dd97

File tree

3 files changed

+34
-15
lines changed

3 files changed

+34
-15
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16310,6 +16310,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
1631016310
break;
1631116311
}
1631216312

16313+
// Use known bits to apply the nsw/nuw flags to the truncate.
16314+
const unsigned DestWidth = VT.getScalarSizeInBits();
16315+
const unsigned SrcWidth = N0.getScalarValueSizeInBits();
16316+
SDNodeFlags Flags = N->getFlags();
16317+
if (!N->getFlags().hasNoSignedWrap() &&
16318+
DAG.ComputeMaxSignificantBits(N0) <= DestWidth)
16319+
Flags.setNoSignedWrap(true);
16320+
if (!N->getFlags().hasNoUnsignedWrap() &&
16321+
DAG.MaskedValueIsZero(N0, APInt::getBitsSetFrom(SrcWidth, DestWidth)))
16322+
Flags.setNoUnsignedWrap(true);
16323+
16324+
if (!(Flags == N->getFlags())) {
16325+
N->setFlags(Flags);
16326+
return SDValue(N, 0);
16327+
}
16328+
1631316329
return SDValue();
1631416330
}
1631516331

@@ -18713,6 +18729,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
1871318729
if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
1871418730
return FTrunc;
1871518731

18732+
// fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
18733+
if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
18734+
TLI.isTypeDesirableForOp(ISD::SINT_TO_FP,
18735+
N0.getOperand(0).getValueType()))
18736+
return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
18737+
1871618738
return SDValue();
1871718739
}
1871818740

@@ -18750,6 +18772,12 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
1875018772
if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
1875118773
return FTrunc;
1875218774

18775+
// fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
18776+
if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
18777+
TLI.isTypeDesirableForOp(ISD::UINT_TO_FP,
18778+
N0.getOperand(0).getValueType()))
18779+
return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
18780+
1875318781
return SDValue();
1875418782
}
1875518783

llvm/test/CodeGen/NVPTX/i8x4-instructions.ll

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1428,23 +1428,18 @@ entry:
14281428
define <4 x float> @test_uitofp_v4i8(<4 x i8> %a) {
14291429
; CHECK-LABEL: test_uitofp_v4i8(
14301430
; CHECK: {
1431-
; CHECK-NEXT: .reg .b16 %rs<5>;
14321431
; CHECK-NEXT: .reg .b32 %r<10>;
14331432
; CHECK-EMPTY:
14341433
; CHECK-NEXT: // %bb.0:
14351434
; CHECK-NEXT: ld.param.b32 %r1, [test_uitofp_v4i8_param_0];
14361435
; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7773U;
1437-
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
1438-
; CHECK-NEXT: cvt.rn.f32.u16 %r3, %rs1;
1436+
; CHECK-NEXT: cvt.rn.f32.u32 %r3, %r2;
14391437
; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0x7772U;
1440-
; CHECK-NEXT: cvt.u16.u32 %rs2, %r4;
1441-
; CHECK-NEXT: cvt.rn.f32.u16 %r5, %rs2;
1438+
; CHECK-NEXT: cvt.rn.f32.u32 %r5, %r4;
14421439
; CHECK-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U;
1443-
; CHECK-NEXT: cvt.u16.u32 %rs3, %r6;
1444-
; CHECK-NEXT: cvt.rn.f32.u16 %r7, %rs3;
1440+
; CHECK-NEXT: cvt.rn.f32.u32 %r7, %r6;
14451441
; CHECK-NEXT: prmt.b32 %r8, %r1, 0, 0x7770U;
1446-
; CHECK-NEXT: cvt.u16.u32 %rs4, %r8;
1447-
; CHECK-NEXT: cvt.rn.f32.u16 %r9, %rs4;
1442+
; CHECK-NEXT: cvt.rn.f32.u32 %r9, %r8;
14481443
; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r9, %r7, %r5, %r3};
14491444
; CHECK-NEXT: ret;
14501445
%f = uitofp <4 x i8> %a to <4 x float>

llvm/test/CodeGen/NVPTX/trunc-tofp.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,13 @@ target triple = "nvptx64-nvidia-cuda"
77
define float @uitofp_trunc_nuw(i32 %x, i32 %y) {
88
; CHECK-LABEL: uitofp_trunc_nuw(
99
; CHECK: {
10-
; CHECK-NEXT: .reg .b16 %rs<2>;
1110
; CHECK-NEXT: .reg .b32 %r<5>;
1211
; CHECK-EMPTY:
1312
; CHECK-NEXT: // %bb.0:
1413
; CHECK-NEXT: ld.param.b32 %r1, [uitofp_trunc_nuw_param_0];
1514
; CHECK-NEXT: ld.param.b32 %r2, [uitofp_trunc_nuw_param_1];
1615
; CHECK-NEXT: add.s32 %r3, %r1, %r2;
17-
; CHECK-NEXT: cvt.u16.u32 %rs1, %r3;
18-
; CHECK-NEXT: cvt.rn.f32.u16 %r4, %rs1;
16+
; CHECK-NEXT: cvt.rn.f32.u32 %r4, %r3;
1917
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
2018
; CHECK-NEXT: ret;
2119
%v = add i32 %x, %y
@@ -27,15 +25,13 @@ define float @uitofp_trunc_nuw(i32 %x, i32 %y) {
2725
define float @sitofp_trunc_nsw(i32 %x, i32 %y) {
2826
; CHECK-LABEL: sitofp_trunc_nsw(
2927
; CHECK: {
30-
; CHECK-NEXT: .reg .b16 %rs<2>;
3128
; CHECK-NEXT: .reg .b32 %r<5>;
3229
; CHECK-EMPTY:
3330
; CHECK-NEXT: // %bb.0:
3431
; CHECK-NEXT: ld.param.b32 %r1, [sitofp_trunc_nsw_param_0];
3532
; CHECK-NEXT: ld.param.b32 %r2, [sitofp_trunc_nsw_param_1];
3633
; CHECK-NEXT: add.s32 %r3, %r1, %r2;
37-
; CHECK-NEXT: cvt.u16.u32 %rs1, %r3;
38-
; CHECK-NEXT: cvt.rn.f32.s16 %r4, %rs1;
34+
; CHECK-NEXT: cvt.rn.f32.s32 %r4, %r3;
3935
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
4036
; CHECK-NEXT: ret;
4137
%v = add i32 %x, %y

0 commit comments

Comments
 (0)