diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index e791bc4b56e52..2cc1bc9fa4a67 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -277,6 +277,15 @@ bool PointerReplacer::collectUsers() { Worklist.emplace_back(I); }; + auto TryPushInstOperand = [&](Instruction *InstOp) { + if (!UsersToReplace.contains(InstOp)) { + if (!ValuesToRevisit.insert(InstOp)) + return false; + Worklist.emplace_back(InstOp); + } + return true; + }; + PushUsersToWorklist(&Root); while (!Worklist.empty()) { Instruction *Inst = Worklist.pop_back_val(); @@ -309,12 +318,8 @@ bool PointerReplacer::collectUsers() { // incoming values. Worklist.emplace_back(PHI); for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) { - auto *IncomingValue = cast(PHI->getIncomingValue(Idx)); - if (UsersToReplace.contains(IncomingValue)) - continue; - if (!ValuesToRevisit.insert(IncomingValue)) + if (!TryPushInstOperand(cast(PHI->getIncomingValue(Idx)))) return false; - Worklist.emplace_back(IncomingValue); } } else if (auto *SI = dyn_cast(Inst)) { auto *TrueInst = dyn_cast(SI->getTrueValue()); @@ -322,8 +327,17 @@ bool PointerReplacer::collectUsers() { if (!TrueInst || !FalseInst) return false; - UsersToReplace.insert(SI); - PushUsersToWorklist(SI); + if (isAvailable(TrueInst) && isAvailable(FalseInst)) { + UsersToReplace.insert(SI); + PushUsersToWorklist(SI); + continue; + } + + // Push select back onto the stack, followed by unavailable true/false + // value. + Worklist.emplace_back(SI); + if (!TryPushInstOperand(TrueInst) || !TryPushInstOperand(FalseInst)) + return false; } else if (auto *GEP = dyn_cast(Inst)) { UsersToReplace.insert(GEP); PushUsersToWorklist(GEP); diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll b/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll index 538cc19f9722e..beb84362b7f92 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll @@ -76,4 +76,37 @@ sink: ret <2 x i64> %val.sink } +; Crashed in IC PtrReplacer because an invalid select was generated with addrspace(4) and addrspace(5) +; operands. +define amdgpu_kernel void @select_addr4_addr5(ptr addrspace(4) byref([12 x i8]) align 16 %arg) { +; CHECK-LABEL: define amdgpu_kernel void @select_addr4_addr5( +; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca i32, i32 0, align 8, addrspace(5) + %alloca1 = alloca [12 x i8], align 16, addrspace(5) + call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false) + %select = select i1 false, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca + call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false) + ret void +} + +; Same as above but with swapped operands on the select. +define amdgpu_kernel void @select_addr4_addr5_swapped(ptr addrspace(4) byref([12 x i8]) align 16 %arg) { +; CHECK-LABEL: define amdgpu_kernel void @select_addr4_addr5_swapped( +; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca i32, i32 0, align 8, addrspace(5) + %alloca1 = alloca [12 x i8], align 16, addrspace(5) + call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false) + %select = select i1 false, ptr addrspace(5) %alloca, ptr addrspace(5) %alloca1 + call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false) + ret void +} + declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias writeonly captures(none), ptr addrspace(4) noalias readonly captures(none), i64, i1 immarg) #0