diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index c1b244f50d93f..b1b15e9915aea 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -401,14 +401,16 @@ static Value *promoteAllocaUserToVector( // We're loading the full vector. Type *AccessTy = Inst->getType(); TypeSize AccessSize = DL.getTypeStoreSize(AccessTy); - if (AccessSize == VecStoreSize && cast(Index)->isZeroValue()) { - if (AccessTy->isPtrOrPtrVectorTy()) - CurVal = CreateTempPtrIntCast(CurVal, AccessTy); - else if (CurVal->getType()->isPtrOrPtrVectorTy()) - CurVal = CreateTempPtrIntCast(CurVal, CurVal->getType()); - Value *NewVal = Builder.CreateBitOrPointerCast(CurVal, AccessTy); - Inst->replaceAllUsesWith(NewVal); - return nullptr; + if (Constant *CI = dyn_cast(Index)) { + if (CI->isZeroValue() && AccessSize == VecStoreSize) { + if (AccessTy->isPtrOrPtrVectorTy()) + CurVal = CreateTempPtrIntCast(CurVal, AccessTy); + else if (CurVal->getType()->isPtrOrPtrVectorTy()) + CurVal = CreateTempPtrIntCast(CurVal, CurVal->getType()); + Value *NewVal = Builder.CreateBitOrPointerCast(CurVal, AccessTy); + Inst->replaceAllUsesWith(NewVal); + return nullptr; + } } // Loading a subvector. @@ -456,12 +458,14 @@ static Value *promoteAllocaUserToVector( // We're storing the full vector, we can handle this without knowing CurVal. Type *AccessTy = Val->getType(); TypeSize AccessSize = DL.getTypeStoreSize(AccessTy); - if (AccessSize == VecStoreSize && cast(Index)->isZeroValue()) { - if (AccessTy->isPtrOrPtrVectorTy()) - Val = CreateTempPtrIntCast(Val, AccessTy); - else if (VectorTy->isPtrOrPtrVectorTy()) - Val = CreateTempPtrIntCast(Val, VectorTy); - return Builder.CreateBitOrPointerCast(Val, VectorTy); + if (Constant *CI = dyn_cast(Index)) { + if (CI->isZeroValue() && AccessSize == VecStoreSize) { + if (AccessTy->isPtrOrPtrVectorTy()) + Val = CreateTempPtrIntCast(Val, AccessTy); + else if (VectorTy->isPtrOrPtrVectorTy()) + Val = CreateTempPtrIntCast(Val, VectorTy); + return Builder.CreateBitOrPointerCast(Val, VectorTy); + } } // Storing a subvector. diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll new file mode 100644 index 0000000000000..272a9ebe536c9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s + +; Check that promoting an alloca to a vector form works correctly when a variable +; vector index is used. + +define amdgpu_kernel void @non_constant_index(i32 %arg) { +; CHECK-LABEL: define amdgpu_kernel void @non_constant_index( +; CHECK-SAME: i32 [[ARG:%.*]]) { +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <2 x float> [ [[TMP2:%.*]], [[BB3]] ], [ undef, [[BB2:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> [[PROMOTEALLOCA]], float 0.000000e+00, i32 [[ARG]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[ARG]], 1 +; CHECK-NEXT: [[TMP2]] = insertelement <2 x float> [[TMP0]], float 0.000000e+00, i32 [[TMP1]] +; CHECK-NEXT: br label [[BB3]] +; +bb: + %i = alloca [2 x float], align 4, addrspace(5) + br label %bb1 + +bb1: + br label %bb1 + +bb2: + br label %bb3 + +bb3: + %i4 = getelementptr float, ptr addrspace(5) %i, i32 %arg + store <2 x float> zeroinitializer, ptr addrspace(5) %i4, align 8 + br label %bb3 +}