From d01533ac8cb6acc06b3321ca772fba407983f420 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Tue, 15 Jul 2025 13:27:16 +0200 Subject: [PATCH 1/3] [InstCombine]PtrReplacer: Correctly handle select with unavailable operands The testcase I added previously failed because a SelectInst with invalid operands was created (one side `addrspace(4)`, the other `addrspace(5)`). PointerReplacer needs to dig deeper if the true and/or false instructions of the select are not available. Fixes SWDEV-542957 --- .../InstCombineLoadStoreAlloca.cpp | 28 ++++++++++++++----- ...ptr-replacer-select-addrspacecast-crash.ll | 20 +++++++++++++ 2 files changed, 41 insertions(+), 7 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/ptr-replacer-select-addrspacecast-crash.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index e791bc4b56e52..2cc1bc9fa4a67 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -277,6 +277,15 @@ bool PointerReplacer::collectUsers() { Worklist.emplace_back(I); }; + auto TryPushInstOperand = [&](Instruction *InstOp) { + if (!UsersToReplace.contains(InstOp)) { + if (!ValuesToRevisit.insert(InstOp)) + return false; + Worklist.emplace_back(InstOp); + } + return true; + }; + PushUsersToWorklist(&Root); while (!Worklist.empty()) { Instruction *Inst = Worklist.pop_back_val(); @@ -309,12 +318,8 @@ bool PointerReplacer::collectUsers() { // incoming values. Worklist.emplace_back(PHI); for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) { - auto *IncomingValue = cast(PHI->getIncomingValue(Idx)); - if (UsersToReplace.contains(IncomingValue)) - continue; - if (!ValuesToRevisit.insert(IncomingValue)) + if (!TryPushInstOperand(cast(PHI->getIncomingValue(Idx)))) return false; - Worklist.emplace_back(IncomingValue); } } else if (auto *SI = dyn_cast(Inst)) { auto *TrueInst = dyn_cast(SI->getTrueValue()); @@ -322,8 +327,17 @@ bool PointerReplacer::collectUsers() { if (!TrueInst || !FalseInst) return false; - UsersToReplace.insert(SI); - PushUsersToWorklist(SI); + if (isAvailable(TrueInst) && isAvailable(FalseInst)) { + UsersToReplace.insert(SI); + PushUsersToWorklist(SI); + continue; + } + + // Push select back onto the stack, followed by unavailable true/false + // value. + Worklist.emplace_back(SI); + if (!TryPushInstOperand(TrueInst) || !TryPushInstOperand(FalseInst)) + return false; } else if (auto *GEP = dyn_cast(Inst)) { UsersToReplace.insert(GEP); PushUsersToWorklist(GEP); diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replacer-select-addrspacecast-crash.ll b/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replacer-select-addrspacecast-crash.ll new file mode 100644 index 0000000000000..921a72364e6f9 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replacer-select-addrspacecast-crash.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=instcombine %s -S -o - | FileCheck %s + +; Crashed in IC PtrReplacer because an invalid select was generated with addrspace(4) and addrspace(5) +; operands. + +define amdgpu_kernel void @eggs(ptr addrspace(4) byref([12 x i8]) align 16 %arg) { +; CHECK-LABEL: define amdgpu_kernel void @eggs( +; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca i32, i32 0, align 8, addrspace(5) + %alloca1 = alloca [12 x i8], align 16, addrspace(5) + call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false) + %select = select i1 false, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca + call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false) + ret void +} From 29cb1e3c782f6a2e089161d12dedf64b4841c0df Mon Sep 17 00:00:00 2001 From: pvanhout Date: Tue, 15 Jul 2025 14:27:01 +0200 Subject: [PATCH 2/3] test nits --- .../ptr-replacer-select-addrspacecast-crash.ll | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replacer-select-addrspacecast-crash.ll b/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replacer-select-addrspacecast-crash.ll index 921a72364e6f9..f359447b6b9d8 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replacer-select-addrspacecast-crash.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replacer-select-addrspacecast-crash.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=instcombine %s -S -o - | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine < %s | FileCheck %s ; Crashed in IC PtrReplacer because an invalid select was generated with addrspace(4) and addrspace(5) ; operands. @@ -18,3 +18,18 @@ bb: call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false) ret void } + +define amdgpu_kernel void @milk(ptr addrspace(4) byref([12 x i8]) align 16 %arg) { +; CHECK-LABEL: define amdgpu_kernel void @milk( +; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca i32, i32 0, align 8, addrspace(5) + %alloca1 = alloca [12 x i8], align 16, addrspace(5) + call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false) + %select = select i1 false, ptr addrspace(5) %alloca, ptr addrspace(5) %alloca1 + call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false) + ret void +} From 0a1bb70ce0939d870cb439285f73d9fa78a7bf36 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Wed, 16 Jul 2025 09:28:16 +0200 Subject: [PATCH 3/3] update test names, move them to ptr-replace-alloca.ll --- .../InstCombine/AMDGPU/ptr-replace-alloca.ll | 33 +++++++++++++++++ ...ptr-replacer-select-addrspacecast-crash.ll | 35 ------------------- 2 files changed, 33 insertions(+), 35 deletions(-) delete mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/ptr-replacer-select-addrspacecast-crash.ll diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll b/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll index 538cc19f9722e..beb84362b7f92 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll @@ -76,4 +76,37 @@ sink: ret <2 x i64> %val.sink } +; Crashed in IC PtrReplacer because an invalid select was generated with addrspace(4) and addrspace(5) +; operands. +define amdgpu_kernel void @select_addr4_addr5(ptr addrspace(4) byref([12 x i8]) align 16 %arg) { +; CHECK-LABEL: define amdgpu_kernel void @select_addr4_addr5( +; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca i32, i32 0, align 8, addrspace(5) + %alloca1 = alloca [12 x i8], align 16, addrspace(5) + call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false) + %select = select i1 false, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca + call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false) + ret void +} + +; Same as above but with swapped operands on the select. +define amdgpu_kernel void @select_addr4_addr5_swapped(ptr addrspace(4) byref([12 x i8]) align 16 %arg) { +; CHECK-LABEL: define amdgpu_kernel void @select_addr4_addr5_swapped( +; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca i32, i32 0, align 8, addrspace(5) + %alloca1 = alloca [12 x i8], align 16, addrspace(5) + call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false) + %select = select i1 false, ptr addrspace(5) %alloca, ptr addrspace(5) %alloca1 + call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false) + ret void +} + declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias writeonly captures(none), ptr addrspace(4) noalias readonly captures(none), i64, i1 immarg) #0 diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replacer-select-addrspacecast-crash.ll b/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replacer-select-addrspacecast-crash.ll deleted file mode 100644 index f359447b6b9d8..0000000000000 --- a/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replacer-select-addrspacecast-crash.ll +++ /dev/null @@ -1,35 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine < %s | FileCheck %s - -; Crashed in IC PtrReplacer because an invalid select was generated with addrspace(4) and addrspace(5) -; operands. - -define amdgpu_kernel void @eggs(ptr addrspace(4) byref([12 x i8]) align 16 %arg) { -; CHECK-LABEL: define amdgpu_kernel void @eggs( -; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: ret void -; -bb: - %alloca = alloca i32, i32 0, align 8, addrspace(5) - %alloca1 = alloca [12 x i8], align 16, addrspace(5) - call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false) - %select = select i1 false, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca - call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false) - ret void -} - -define amdgpu_kernel void @milk(ptr addrspace(4) byref([12 x i8]) align 16 %arg) { -; CHECK-LABEL: define amdgpu_kernel void @milk( -; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: ret void -; -bb: - %alloca = alloca i32, i32 0, align 8, addrspace(5) - %alloca1 = alloca [12 x i8], align 16, addrspace(5) - call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false) - %select = select i1 false, ptr addrspace(5) %alloca, ptr addrspace(5) %alloca1 - call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false) - ret void -}