diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 17c466f38c9c3..4481375054ecf 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -174,6 +174,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -235,18 +236,16 @@ class ConstantOffsetExtractor { /// \p UserChainTail Outputs the tail of UserChain so that we can /// garbage-collect unused instructions in UserChain. static Value *Extract(Value *Idx, GetElementPtrInst *GEP, - User *&UserChainTail, const DominatorTree *DT); + User *&UserChainTail); /// Looks for a constant offset from the given GEP index without extracting /// it. It returns the numeric value of the extracted constant offset (0 if /// failed). The meaning of the arguments are the same as Extract. - static int64_t Find(Value *Idx, GetElementPtrInst *GEP, - const DominatorTree *DT); + static int64_t Find(Value *Idx, GetElementPtrInst *GEP); private: - ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT) - : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) { - } + ConstantOffsetExtractor(Instruction *InsertionPt) + : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()) {} /// Searches the expression that computes V for a non-zero constant C s.t. /// V can be reassociated into the form V' + C. If the searching is @@ -336,7 +335,6 @@ class ConstantOffsetExtractor { Instruction *IP; const DataLayout &DL; - const DominatorTree *DT; }; /// A pass that tries to split every GEP in the function into a variadic @@ -519,12 +517,10 @@ bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended, } Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1); - // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS - // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS). - // FIXME: this does not appear to be covered by any tests - // (with x86/aarch64 backends at least) + // Do not trace into "or" unless it is equivalent to "add". + // This is the case if the or's disjoint flag is set. if (BO->getOpcode() == Instruction::Or && - !haveNoCommonBitsSet(LHS, RHS, SimplifyQuery(DL, DT, /*AC*/ nullptr, BO))) + !cast(BO)->isDisjoint()) return false; // FIXME: We don't currently support constants from the RHS of subs, @@ -778,9 +774,8 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { } Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP, - User *&UserChainTail, - const DominatorTree *DT) { - ConstantOffsetExtractor Extractor(GEP, DT); + User *&UserChainTail) { + ConstantOffsetExtractor Extractor(GEP); // Find a non-zero constant offset first. APInt ConstantOffset = Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, @@ -795,10 +790,9 @@ Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP, return IdxWithoutConstOffset; } -int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP, - const DominatorTree *DT) { +int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) { // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative. - return ConstantOffsetExtractor(GEP, DT) + return ConstantOffsetExtractor(GEP) .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, GEP->isInBounds()) .getSExtValue(); @@ -836,7 +830,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, // Tries to extract a constant offset from this GEP index. int64_t ConstantOffset = - ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT); + ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP); if (ConstantOffset != 0) { NeedsExtraction = true; // A GEP may have multiple indices. We accumulate the extracted @@ -1026,7 +1020,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { Value *OldIdx = GEP->getOperand(I); User *UserChainTail; Value *NewIdx = - ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT); + ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail); if (NewIdx != nullptr) { // Switches to the index with the constant offset removed. GEP->setOperand(I, NewIdx); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll index 0a51de3cdf20b..9e58b716adb1a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll @@ -19,17 +19,17 @@ define amdgpu_cs void @test1(i32 %arg1, <4 x i32> inreg %arg2, i32, ptr addrspac %ad1 = ptrtoint ptr addrspace(6) %ep1 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 11, <4 x i32> %arg2, i32 %ad1, i32 0, i32 0) - %bs2 = or i32 %bs1, 1 + %bs2 = or disjoint i32 %bs1, 1 %ep2 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs2 %ad2 = ptrtoint ptr addrspace(6) %ep2 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 22, <4 x i32> %arg2, i32 %ad2, i32 0, i32 0) - %bs3 = or i32 %bs1, 2 + %bs3 = or disjoint i32 %bs1, 2 %ep3 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs3 %ad3 = ptrtoint ptr addrspace(6) %ep3 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 33, <4 x i32> %arg2, i32 %ad3, i32 0, i32 0) - %bs4 = or i32 %bs1, 3 + %bs4 = or disjoint i32 %bs1, 3 %ep4 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs4 %ad4 = ptrtoint ptr addrspace(6) %ep4 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 44, <4 x i32> %arg2, i32 %ad4, i32 0, i32 0) @@ -55,17 +55,17 @@ define amdgpu_cs void @test1_ptr(i32 %arg1, ptr addrspace(8) inreg %arg2, i32, p %ad1 = ptrtoint ptr addrspace(6) %ep1 to i32 call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 11, ptr addrspace(8) %arg2, i32 %ad1, i32 0, i32 0) - %bs2 = or i32 %bs1, 1 + %bs2 = or disjoint i32 %bs1, 1 %ep2 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs2 %ad2 = ptrtoint ptr addrspace(6) %ep2 to i32 call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 22, ptr addrspace(8) %arg2, i32 %ad2, i32 0, i32 0) - %bs3 = or i32 %bs1, 2 + %bs3 = or disjoint i32 %bs1, 2 %ep3 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs3 %ad3 = ptrtoint ptr addrspace(6) %ep3 to i32 call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 33, ptr addrspace(8) %arg2, i32 %ad3, i32 0, i32 0) - %bs4 = or i32 %bs1, 3 + %bs4 = or disjoint i32 %bs1, 3 %ep4 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs4 %ad4 = ptrtoint ptr addrspace(6) %ep4 to i32 call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 44, ptr addrspace(8) %arg2, i32 %ad4, i32 0, i32 0) @@ -90,17 +90,17 @@ define amdgpu_cs void @test2(i32 %arg1, <4 x i32> inreg %arg2) { %ad1 = ptrtoint ptr addrspace(6) %ep1 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 11, <4 x i32> %arg2, i32 %ad1, i32 0, i32 0) - %bs2 = or i32 %bs1, 1 + %bs2 = or disjoint i32 %bs1, 1 %ep2 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs2 %ad2 = ptrtoint ptr addrspace(6) %ep2 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 22, <4 x i32> %arg2, i32 %ad2, i32 0, i32 0) - %bs3 = or i32 %bs1, 2 + %bs3 = or disjoint i32 %bs1, 2 %ep3 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs3 %ad3 = ptrtoint ptr addrspace(6) %ep3 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 33, <4 x i32> %arg2, i32 %ad3, i32 0, i32 0) - %bs4 = or i32 %bs1, 3 + %bs4 = or disjoint i32 %bs1, 3 %ep4 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs4 %ad4 = ptrtoint ptr addrspace(6) %ep4 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 44, <4 x i32> %arg2, i32 %ad4, i32 0, i32 0) @@ -125,17 +125,17 @@ define amdgpu_cs void @test2_ptr(i32 %arg1, ptr addrspace(8) inreg %arg2) { %ad1 = ptrtoint ptr addrspace(6) %ep1 to i32 call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 11, ptr addrspace(8) %arg2, i32 %ad1, i32 0, i32 0) - %bs2 = or i32 %bs1, 1 + %bs2 = or disjoint i32 %bs1, 1 %ep2 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs2 %ad2 = ptrtoint ptr addrspace(6) %ep2 to i32 call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 22, ptr addrspace(8) %arg2, i32 %ad2, i32 0, i32 0) - %bs3 = or i32 %bs1, 2 + %bs3 = or disjoint i32 %bs1, 2 %ep3 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs3 %ad3 = ptrtoint ptr addrspace(6) %ep3 to i32 call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 33, ptr addrspace(8) %arg2, i32 %ad3, i32 0, i32 0) - %bs4 = or i32 %bs1, 3 + %bs4 = or disjoint i32 %bs1, 3 %ep4 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs4 %ad4 = ptrtoint ptr addrspace(6) %ep4 to i32 call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 44, ptr addrspace(8) %arg2, i32 %ad4, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll b/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll index a89a2bb28b87b..8cb7d6651a08c 100644 --- a/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll +++ b/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll @@ -238,7 +238,7 @@ main_body: %25 = getelementptr inbounds [0 x <8 x i32>], ptr addrspace(6) %1, i32 0, i32 %24, !amdgpu.uniform !0 %26 = load <8 x i32>, ptr addrspace(6) %25, align 32, !invariant.load !0 %27 = shl i32 %23, 2 - %28 = or i32 %27, 3 + %28 = or disjoint i32 %27, 3 %29 = getelementptr inbounds [0 x <4 x i32>], ptr addrspace(6) %1, i32 0, i32 %28, !amdgpu.uniform !0 %30 = load <4 x i32>, ptr addrspace(6) %29, align 16, !invariant.load !0 %31 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %30, i1 0, i32 0, i32 0) #8 @@ -270,7 +270,7 @@ main_body: %25 = getelementptr inbounds [0 x <8 x i32>], ptr addrspace(6) %1, i32 0, i32 %24 %26 = load <8 x i32>, ptr addrspace(6) %25, align 32, !invariant.load !0 %27 = shl i32 %23, 2 - %28 = or i32 %27, 3 + %28 = or disjoint i32 %27, 3 %29 = getelementptr inbounds [0 x <4 x i32>], ptr addrspace(6) %1, i32 0, i32 %28 %30 = load <4 x i32>, ptr addrspace(6) %29, align 16, !invariant.load !0 %31 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %30, i1 0, i32 0, i32 0) #8 diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll index 4202edfbd0eb4..069c57e2ae63e 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll @@ -1157,38 +1157,38 @@ define <8 x i16> @large_vector(ptr addrspace(3) %p, i32 %idxp) { ; GFX11-NEXT: s_setpc_b64 s[30:31] %idx = shl i32 %idxp, 4 - %i.0 = or i32 %idx, 0 + %i.0 = or disjoint i32 %idx, 0 %p.0 = getelementptr half, ptr addrspace(3) %p, i32 %i.0 %x.0 = load i16, ptr addrspace(3) %p.0, align 4 %v0p = insertelement <8 x i16> poison, i16 %x.0, i32 0 - %i.1 = or i32 %idx, 1 + %i.1 = or disjoint i32 %idx, 1 %p.1 = getelementptr half, ptr addrspace(3) %p, i32 %i.1 %x.1 = load i16, ptr addrspace(3) %p.1, align 2 %v0 = insertelement <8 x i16> %v0p, i16 %x.1, i32 1 - %i.2 = or i32 %idx, 2 + %i.2 = or disjoint i32 %idx, 2 %p.2 = getelementptr half, ptr addrspace(3) %p, i32 %i.2 %x.2 = load i16, ptr addrspace(3) %p.2, align 4 %v1p = insertelement <8 x i16> poison, i16 %x.2, i32 0 - %i.3 = or i32 %idx, 3 + %i.3 = or disjoint i32 %idx, 3 %p.3 = getelementptr half, ptr addrspace(3) %p, i32 %i.3 %x.3 = load i16, ptr addrspace(3) %p.3, align 2 %v1 = insertelement <8 x i16> %v1p, i16 %x.3, i32 1 - %i.4 = or i32 %idx, 4 + %i.4 = or disjoint i32 %idx, 4 %p.4 = getelementptr half, ptr addrspace(3) %p, i32 %i.4 %x.4 = load i16, ptr addrspace(3) %p.4, align 4 %v2p = insertelement <8 x i16> poison, i16 %x.4, i32 0 - %i.5 = or i32 %idx, 5 + %i.5 = or disjoint i32 %idx, 5 %p.5 = getelementptr half, ptr addrspace(3) %p, i32 %i.5 %x.5 = load i16, ptr addrspace(3) %p.5, align 2 %v2 = insertelement <8 x i16> %v2p, i16 %x.5, i32 1 - %i.6 = or i32 %idx, 6 + %i.6 = or disjoint i32 %idx, 6 %p.6 = getelementptr half, ptr addrspace(3) %p, i32 %i.6 %x.6 = load i16, ptr addrspace(3) %p.6, align 4 %v3p = insertelement <8 x i16> poison, i16 %x.6, i32 0 - %i.7 = or i32 %idx, 7 + %i.7 = or disjoint i32 %idx, 7 %p.7 = getelementptr half, ptr addrspace(3) %p, i32 %i.7 %x.7 = load i16, ptr addrspace(3) %p.7, align 2 %v3 = insertelement <8 x i16> %v3p, i16 %x.7, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll index 0f2b2aa4d3562..d9f6ce0b4c851 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll @@ -732,7 +732,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no %192 = and i64 %191, 4294967168 %193 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 %192 %194 = shl nuw nsw i32 %178, 5 - %195 = or i32 %194, 8 + %195 = or disjoint i32 %194, 8 %196 = zext i32 %195 to i64 %197 = getelementptr inbounds i8, ptr addrspace(1) %193, i64 %196 %198 = getelementptr inbounds i8, ptr addrspace(1) %197, i64 -4 diff --git a/llvm/test/CodeGen/NVPTX/vector-loads.ll b/llvm/test/CodeGen/NVPTX/vector-loads.ll index 672c313cf5d19..9322b9e0fe6c8 100644 --- a/llvm/test/CodeGen/NVPTX/vector-loads.ll +++ b/llvm/test/CodeGen/NVPTX/vector-loads.ll @@ -78,11 +78,11 @@ define void @foo_complex(ptr nocapture readonly align 16 dereferenceable(1342177 %t3 = shl nuw nsw i32 %t1, 9 %ttile_origin.2 = and i32 %t3, 130560 %tstart_offset_x_mul = shl nuw nsw i32 %t0, 1 - %t4 = or i32 %ttile_origin.2, %tstart_offset_x_mul - %t6 = or i32 %t4, 1 - %t8 = or i32 %t4, 128 + %t4 = or disjoint i32 %ttile_origin.2, %tstart_offset_x_mul + %t6 = or disjoint i32 %t4, 1 + %t8 = or disjoint i32 %t4, 128 %t9 = zext i32 %t8 to i64 - %t10 = or i32 %t4, 129 + %t10 = or disjoint i32 %t4, 129 %t11 = zext i32 %t10 to i64 %t20 = zext i32 %t2 to i64 %t27 = getelementptr inbounds [1024 x [131072 x i8]], ptr %alloc0, i64 0, i64 %t20, i64 %t9 diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll index 92d67bafd5183..53b0a2737122e 100644 --- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -496,7 +496,7 @@ for.body: ; preds = %for.body, %for.body %idxprom = zext i32 %mul to i64 %arrayidx = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom %4 = load <16 x i8>, ptr %arrayidx, align 16 - %add2 = or i32 %mul, 1 + %add2 = or disjoint i32 %mul, 1 %idxprom3 = zext i32 %add2 to i64 %arrayidx4 = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom3 %5 = load <16 x i8>, ptr %arrayidx4, align 16 diff --git a/llvm/test/CodeGen/PowerPC/sched-addi.ll b/llvm/test/CodeGen/PowerPC/sched-addi.ll index ce6679ab7bb3d..65cc47be28ee1 100644 --- a/llvm/test/CodeGen/PowerPC/sched-addi.ll +++ b/llvm/test/CodeGen/PowerPC/sched-addi.ll @@ -99,7 +99,7 @@ entry: vector.body: %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] - %offset.idx = or i64 %index, 1 + %offset.idx = or disjoint i64 %index, 1 %0 = getelementptr %_elem_type_of_x, ptr %x_rvo_based_addr_3, i64 %offset.idx, i32 0 %1 = getelementptr %_elem_type_of_a, ptr %a_rvo_based_addr_5, i64 %offset.idx, i32 0 %wide.load = load <4 x double>, ptr %1, align 8 diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll index 427681ac724ee..83e36eba36c84 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll @@ -157,7 +157,7 @@ main_body: %25 = getelementptr [0 x <8 x i32>], ptr addrspace(4) %1, i32 0, i32 %24, !amdgpu.uniform !0 %26 = load <8 x i32>, ptr addrspace(4) %25, align 32, !invariant.load !0 %27 = shl i32 %23, 2 - %28 = or i32 %27, 3 + %28 = or disjoint i32 %27, 3 %29 = getelementptr [0 x <4 x i32>], ptr addrspace(4) %1, i32 0, i32 %28, !amdgpu.uniform !0 %30 = load <4 x i32>, ptr addrspace(4) %29, align 16, !invariant.load !0 %31 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %30, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8 diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll index 49c6a46b136d5..c915b9a5e59ac 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll @@ -142,7 +142,7 @@ define ptr @sext_or(i64 %a, i32 %b) { ; entry: %b1 = shl i32 %b, 2 - %b2 = or i32 %b1, 1 ; (b << 2) and 1 have no common bits + %b2 = or disjoint i32 %b1, 1 ; (b << 2) and 1 have no common bits %b3 = or i32 %b1, 4 ; (b << 2) and 4 may have common bits %b2.ext = zext i32 %b2 to i64 %b3.ext = sext i32 %b3 to i64 @@ -335,7 +335,7 @@ define ptr @shl_add_or(i64 %a, ptr %ptr) { entry: %shl = shl i64 %a, 2 %add = add i64 %shl, 12 - %or = or i64 %add, 1 + %or = or disjoint i64 %add, 1 ; ((a << 2) + 12) and 1 have no common bits. Therefore, ; SeparateConstOffsetFromGEP is able to extract the 12. ; TODO(jingyue): We could reassociate the expression to combine 12 and 1. diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll index 45154f5a68f92..e405bbd5347ee 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll @@ -22,16 +22,17 @@ define void @testOrDoesntSplit(ptr %p) { ret void } -define void @testNoBitsInCommonOrSplits(ptr %p) { -; CHECK-LABEL: define void @testNoBitsInCommonOrSplits( +; COM: The check for `or disjoint` removed the old hasNoBitsInCommon() +; COM: check, ensure that failing to annotate an or with disjoint makes +; COM: the optimization fail. +define void @testNoBitsInCommonOrDoesntSplit(ptr %p) { +; CHECK-LABEL: define void @testNoBitsInCommonOrDoesntSplit( ; CHECK-SAME: ptr [[P:%.*]]) { ; CHECK-NEXT: [[VAR:%.*]] = tail call i64 @foo() ; CHECK-NEXT: [[VAR_HIGH:%.*]] = and i64 [[VAR]], -16 -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[VAR_HIGH]] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 10 -; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[OFF:%.*]] = or i64 [[VAR_HIGH]], 10 +; CHECK-NEXT: [[Q:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFF]] +; CHECK-NEXT: store i8 0, ptr [[Q]], align 1 ; CHECK-NEXT: ret void ; %var = tail call i64 @foo() @@ -46,9 +47,11 @@ define void @testDisjointOrSplits(ptr %p) { ; CHECK-LABEL: define void @testDisjointOrSplits( ; CHECK-SAME: ptr [[P:%.*]]) { ; CHECK-NEXT: [[VAR:%.*]] = tail call i64 @foo() -; CHECK-NEXT: [[OFF:%.*]] = or disjoint i64 [[VAR]], 10 -; CHECK-NEXT: [[Q:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFF]] -; CHECK-NEXT: store i8 0, ptr [[Q]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[VAR]] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 10 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1 ; CHECK-NEXT: ret void ; %var = tail call i64 @foo()