diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 734191447d67f..5f1e38a33c891 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12843,22 +12843,21 @@ SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) { SDLoc DL(HG); EVT MemVT = HG->getMemoryVT(); + EVT DataVT = Index.getValueType(); MachineMemOperand *MMO = HG->getMemOperand(); ISD::MemIndexType IndexType = HG->getIndexType(); if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return Chain; - SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index, - HG->getScale(), HG->getIntID()}; - if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL)) + if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) || + refineIndexType(Index, IndexType, DataVT, DAG)) { + SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index, + HG->getScale(), HG->getIntID()}; return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops, MMO, IndexType); + } - EVT DataVT = Index.getValueType(); - if (refineIndexType(Index, IndexType, DataVT, DAG)) - return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops, - MMO, IndexType); return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/aarch64-histcnt-dag-combine-hang.ll b/llvm/test/CodeGen/AArch64/aarch64-histcnt-dag-combine-hang.ll new file mode 100644 index 0000000000000..da04c67aa6c5c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-histcnt-dag-combine-hang.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mattr=+sve2 -verify-machineinstrs < %s -o - | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; This test is reduced from a real world example that would cause the DAGCombiner to hang. + +define void @histcnt_loop(ptr %0, i64 %1, ptr %2, i64 %3, i64 %4) { +; CHECK-LABEL: histcnt_loop: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, #1 // =0x1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: add x9, x0, x1 +; CHECK-NEXT: .LBB0_1: // %loop +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, x8, lsl #1] +; CHECK-NEXT: lsl x10, x8, #1 +; CHECK-NEXT: add x11, x0, x10 +; CHECK-NEXT: add x10, x9, x10 +; CHECK-NEXT: lsl z1.d, z1.d, #1 +; CHECK-NEXT: ld1h { z4.d }, p0/z, [x11, #1, mul vl] +; CHECK-NEXT: ld1h { z5.d }, p0/z, [x10, #1, mul vl] +; CHECK-NEXT: histcnt z2.d, p0/z, z1.d, z1.d +; CHECK-NEXT: ld1h { z3.d }, p0/z, [x2, z1.d] +; CHECK-NEXT: mad z2.d, p0/m, z0.d, z3.d +; CHECK-NEXT: ld1h { z3.d }, p0/z, [x9, x8, lsl #1] +; CHECK-NEXT: add x8, x8, x3 +; CHECK-NEXT: cmp x4, x8 +; CHECK-NEXT: st1h { z2.d }, p0, [x2, z1.d] +; CHECK-NEXT: lsl z1.d, z4.d, #1 +; CHECK-NEXT: histcnt z2.d, p0/z, z1.d, z1.d +; CHECK-NEXT: ld1h { z4.d }, p0/z, [x2, z1.d] +; CHECK-NEXT: mad z2.d, p0/m, z0.d, z4.d +; CHECK-NEXT: st1h { z2.d }, p0, [x2, z1.d] +; CHECK-NEXT: lsl z1.d, z3.d, #1 +; CHECK-NEXT: histcnt z2.d, p0/z, z1.d, z1.d +; CHECK-NEXT: ld1h { z3.d }, p0/z, [x2, z1.d] +; CHECK-NEXT: mad z2.d, p0/m, z0.d, z3.d +; CHECK-NEXT: st1h { z2.d }, p0, [x2, z1.d] +; CHECK-NEXT: lsl z1.d, z5.d, #1 +; CHECK-NEXT: histcnt z2.d, p0/z, z1.d, z1.d +; CHECK-NEXT: ld1h { z3.d }, p0/z, [x2, z1.d] +; CHECK-NEXT: mad z2.d, p0/m, z0.d, z3.d +; CHECK-NEXT: st1h { z2.d }, p0, [x2, z1.d] +; CHECK-NEXT: b.ne .LBB0_1 +; CHECK-NEXT: // %bb.2: // %exit +; CHECK-NEXT: ret +entry: + br label %loop + +loop: + %6 = phi i64 [ 0, %entry ], [ %15, %loop ] + %7 = getelementptr inbounds nuw i16, ptr %0, i64 %6 + %8 = getelementptr inbounds nuw i8, ptr %7, i64 %1 + %9 = load , ptr %7, align 2 + %10 = load , ptr %8, align 2 + %11 = zext %9 to + %12 = zext %10 to + %13 = getelementptr inbounds nuw [16 x i16], ptr %2, i64 0, %11 + %14 = getelementptr inbounds nuw [16 x i16], ptr %2, i64 0, %12 + call void @llvm.experimental.vector.histogram.add.nxv4p0.i16( %13, i16 1, splat (i1 true)) + call void @llvm.experimental.vector.histogram.add.nxv4p0.i16( %14, i16 1, splat (i1 true)) + %15 = add nuw i64 %6, %3 + %16 = icmp eq i64 %15, %4 + br i1 %16, label %exit, label %loop + +exit: + ret void +}